From 9b26e5686fb06b292c0aac6481544adeb052a43c Mon Sep 17 00:00:00 2001 From: Alberto Scolari Date: Mon, 4 Jul 2022 15:48:34 +0200 Subject: [PATCH 01/28] fixing iterator advancement in buildVector() --- include/graphblas/reference/vector.hpp | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/include/graphblas/reference/vector.hpp b/include/graphblas/reference/vector.hpp index f0db908b2..e6167a868 100644 --- a/include/graphblas/reference/vector.hpp +++ b/include/graphblas/reference/vector.hpp @@ -478,18 +478,19 @@ namespace grb { // perform straight copy fwd_iterator it = start; - for( size_t i = 0; start != end && i < _coordinates.size(); ++i ) { + for( size_t i = 0; it != end && i < _coordinates.size(); ++i ) { // flag coordinate as assigned if( _coordinates.assign( i ) ) { if( descr & descriptors::no_duplicates ) { return ILLEGAL; } // nonzero already existed, so fold into existing one - foldl( _raw[ i ], *it++, dup ); + foldl( _raw[ i ], *it, dup ); } else { // new nonzero, so overwrite - _raw[ i ] = static_cast< D >( *it++ ); + _raw[ i ] = static_cast< D >( *it ); } + ++it; } // write back final position @@ -538,7 +539,9 @@ namespace grb { nnz_iterator nnz = nnz_start; ind_iterator ind = ind_start; while( nnz != nnz_end || ind != ind_end ) { - const size_t i = static_cast< size_t >( *ind++ ); + const size_t i = static_cast< size_t >( *ind ); + ++ind; + // sanity check if( i >= _coordinates.size() ) { return MISMATCH; @@ -547,10 +550,11 @@ namespace grb { if( descr & descriptors::no_duplicates ) { return ILLEGAL; } - foldl( _raw[ i ], *nnz++, dup ); + foldl( _raw[ i ], *nnz, dup ); } else { - _raw[ i ] = static_cast< D >( *nnz++ ); + _raw[ i ] = static_cast< D >( *nnz ); } + ++nnz; } // done From 56cd0f24e7851a0a9ce32c7644f3237f53897482 Mon Sep 17 00:00:00 2001 From: Alberto Scolari Date: Fri, 23 Sep 2022 11:53:19 +0200 Subject: [PATCH 02/28] multiple fixes to HPCG benchmark: fixing unititialized variable; printing with printf: unique line with multiple threads/processes and max precision with floats; renaming option: max_iter -> max-iter; adding descriptors for major operations and removing check for the color mask --- include/graphblas/algorithms/hpcg/hpcg.hpp | 4 ++-- .../graphblas/algorithms/hpcg/multigrid_v_cycle.hpp | 6 +++--- .../algorithms/hpcg/red_black_gauss_seidel.hpp | 5 ++--- tests/smoke/hpcg.cpp | 13 ++++++++----- 4 files changed, 15 insertions(+), 13 deletions(-) diff --git a/include/graphblas/algorithms/hpcg/hpcg.hpp b/include/graphblas/algorithms/hpcg/hpcg.hpp index 6caf22a1c..eef0a3376 100644 --- a/include/graphblas/algorithms/hpcg/hpcg.hpp +++ b/include/graphblas/algorithms/hpcg/hpcg.hpp @@ -121,7 +121,7 @@ namespace grb { ret = ret ? ret : grb::set( p, 0 ); ret = ret ? ret : grb::set( p, x ); - ret = ret ? ret : grb::mxv( Ap, A, x, ring ); // Ap = A * x + ret = ret ? ret : grb::mxv< grb::descriptors::dense >( Ap, A, x, ring ); // Ap = A * x assert( ret == SUCCESS ); ret = ret ? ret : grb::eWiseApply( r, b, Ap, minus ); // r = b - Ap; @@ -186,7 +186,7 @@ namespace grb { #endif ret = ret ? ret : grb::set( Ap, 0 ); - ret = ret ? ret : grb::mxv( Ap, A, p, ring ); // Ap = A * p; + ret = ret ? ret : grb::mxv< grb::descriptors::dense >( Ap, A, p, ring ); // Ap = A * p; assert( ret == SUCCESS ); #ifdef HPCG_PRINT_STEPS DBG_print_norm( Ap, "middle Ap" ); diff --git a/include/graphblas/algorithms/hpcg/multigrid_v_cycle.hpp b/include/graphblas/algorithms/hpcg/multigrid_v_cycle.hpp index f40296f91..7541a387f 100644 --- a/include/graphblas/algorithms/hpcg/multigrid_v_cycle.hpp +++ b/include/graphblas/algorithms/hpcg/multigrid_v_cycle.hpp @@ -76,7 +76,7 @@ namespace grb { // actual coarsening, from ncols(*coarsening_data->A) == *coarsening_data->system_size * 8 // to *coarsening_data->system_size ret = ret ? ret : grb::set( coarsening_data.r, 0 ); - ret = ret ? ret : grb::mxv( coarsening_data.r, coarsening_data.coarsening_matrix, coarsening_data.Ax_finer, + ret = ret ? ret : grb::mxv< grb::descriptors::dense >( coarsening_data.r, coarsening_data.coarsening_matrix, coarsening_data.Ax_finer, ring ); // r = coarsening_matrix * Ax_finer return ret; } @@ -108,7 +108,7 @@ namespace grb { // to nrows(x_fine) ret = ret ? ret : set( coarsening_data.Ax_finer, 0 ); - ret = ret ? ret : grb::mxv< grb::descriptors::transpose_matrix >( coarsening_data.Ax_finer, coarsening_data.coarsening_matrix, coarsening_data.z, ring ); + ret = ret ? ret : grb::mxv< grb::descriptors::transpose_matrix | grb::descriptors::dense >( coarsening_data.Ax_finer, coarsening_data.coarsening_matrix, coarsening_data.z, ring ); assert( ret == SUCCESS ); ret = ret ? ret : grb::foldl( x_fine, coarsening_data.Ax_finer, ring.getAdditiveMonoid() ); // x_fine += Ax_finer; @@ -216,7 +216,7 @@ namespace grb { #endif ret = ret ? ret : grb::set( cd.Ax_finer, 0 ); - ret = ret ? ret : grb::mxv( cd.Ax_finer, data.A, data.z, ring ); + ret = ret ? ret : grb::mxv< grb::descriptors::dense >( cd.Ax_finer, data.A, data.z, ring ); assert( ret == SUCCESS ); ret = ret ? ret : compute_coarsening( data.r, cd, ring, minus ); diff --git a/include/graphblas/algorithms/hpcg/red_black_gauss_seidel.hpp b/include/graphblas/algorithms/hpcg/red_black_gauss_seidel.hpp index 718e5015c..6fdc3c9a3 100644 --- a/include/graphblas/algorithms/hpcg/red_black_gauss_seidel.hpp +++ b/include/graphblas/algorithms/hpcg/red_black_gauss_seidel.hpp @@ -29,7 +29,6 @@ #include - namespace grb { namespace algorithms { namespace internal { @@ -77,11 +76,11 @@ namespace grb { // nonetheless, it is left not to violate the semantics of RBGS in case also the false values // had been initialized (in which case the check is fundamental); if only true values were initialized, // we expect CPU branch prediction to neutralize the branch cost - if( color_mask[ i ] ) { + // if( color_mask[ i ] ) { IOType d = A_diagonal[ i ]; IOType v = r[ i ] - smoother_temp[ i ] + x[ i ] * d; x[ i ] = v / d; - } + // } }, color_mask, x, r, smoother_temp, A_diagonal ); assert( ret == SUCCESS ); diff --git a/tests/smoke/hpcg.cpp b/tests/smoke/hpcg.cpp index d84c157e0..5b34d9895 100644 --- a/tests/smoke/hpcg.cpp +++ b/tests/smoke/hpcg.cpp @@ -41,6 +41,8 @@ // here we define a custom macro and do not use NDEBUG since the latter is not defined for smoke tests #ifdef HPCG_PRINT_STEPS +#include + // HPCG_PRINT_STEPS requires defining the following symbols /** @@ -176,15 +178,16 @@ template< typename T, class Ring = Semiring< grb::operators::add< T >, grb::operators::mul< T >, grb::identities::zero, grb::identities::one > > void print_norm( const grb::Vector< T > & r, const char * head, const Ring & ring ) { - T norm; - RC ret = grb::dot( norm, r, r, ring ); // residual = r' * r; + T norm = 0; + RC ret = grb::dot( norm, r, r, ring ); // norm = r' * r; (void)ret; assert( ret == SUCCESS ); - std::cout << ">>> "; if( head != nullptr ) { std::cout << head << ": "; + printf(">>> %s: %lf\n", head, norm ); + } else { + printf(">>> %lf\n", norm ); } - std::cout << norm << std::endl; } #endif @@ -377,7 +380,7 @@ static void parse_arguments( simulation_input & sim_in, size_t & outer_iteration " by the minimum system dimension" ) .add_optional_argument( "--test-rep", sim_in.test_repetitions, grb::config::BENCHMARKING::inner(), "consecutive test repetitions before benchmarking" ) .add_optional_argument( "--init-iter", outer_iterations, grb::config::BENCHMARKING::outer(), "test repetitions with complete initialization" ) - .add_optional_argument( "--max_iter", sim_in.max_iterations, MAX_ITERATIONS_DEF, "maximum number of HPCG iterations" ) + .add_optional_argument( "--max-iter", sim_in.max_iterations, MAX_ITERATIONS_DEF, "maximum number of HPCG iterations" ) .add_optional_argument( "--max-residual-norm", max_residual_norm, MAX_NORM, "maximum norm for the residual to be acceptable (does NOT limit " "the execution of the algorithm)" ) From d2ad73a5e38b679a61c20e6a9d0921622aede26a Mon Sep 17 00:00:00 2001 From: Alberto Scolari Date: Wed, 6 Apr 2022 14:55:47 +0200 Subject: [PATCH 03/28] adding geometry generator for N-dimensional systems, also with halo adding old files for testing little test adding test for generator iterators fixing test to be successful memory pre-allocation polishing geometry utils, using them to build HPCG input and adding test --- include/graphblas/algorithms/hpcg/hpcg.hpp | 20 +- .../algorithms/hpcg/matrix_building_utils.hpp | 110 ++- .../algorithms/hpcg/ndim_matrix_builders.hpp | 736 +++++++++--------- .../hpcg/old_matrix_building_utils.hpp | 173 ++++ .../hpcg/old_ndim_matrix_builders.hpp | 548 +++++++++++++ .../algorithms/hpcg/system_building_utils.hpp | 59 +- .../utils/geometry/array_vector_storage.hpp | 67 ++ .../utils/geometry/generic_vector_storage.hpp | 117 +++ .../linearized_halo_ndim_geometry.hpp | 232 ++++++ .../linearized_halo_ndim_iterator.hpp | 377 +++++++++ .../geometry/linearized_halo_ndim_system.hpp | 111 +++ .../geometry/linearized_ndim_iterator.hpp | 178 +++++ .../utils/geometry/linearized_ndim_system.hpp | 174 +++++ .../graphblas/utils/geometry/ndim_system.hpp | 69 ++ .../graphblas/utils/geometry/ndim_vector.hpp | 122 +++ tests/smoke/hpcg.cpp | 306 +++++++- 16 files changed, 2999 insertions(+), 400 deletions(-) create mode 100644 include/graphblas/algorithms/hpcg/old_matrix_building_utils.hpp create mode 100644 include/graphblas/algorithms/hpcg/old_ndim_matrix_builders.hpp create mode 100644 include/graphblas/utils/geometry/array_vector_storage.hpp create mode 100644 include/graphblas/utils/geometry/generic_vector_storage.hpp create mode 100644 include/graphblas/utils/geometry/linearized_halo_ndim_geometry.hpp create mode 100644 include/graphblas/utils/geometry/linearized_halo_ndim_iterator.hpp create mode 100644 include/graphblas/utils/geometry/linearized_halo_ndim_system.hpp create mode 100644 include/graphblas/utils/geometry/linearized_ndim_iterator.hpp create mode 100644 include/graphblas/utils/geometry/linearized_ndim_system.hpp create mode 100644 include/graphblas/utils/geometry/ndim_system.hpp create mode 100644 include/graphblas/utils/geometry/ndim_vector.hpp diff --git a/include/graphblas/algorithms/hpcg/hpcg.hpp b/include/graphblas/algorithms/hpcg/hpcg.hpp index eef0a3376..492eb038d 100644 --- a/include/graphblas/algorithms/hpcg/hpcg.hpp +++ b/include/graphblas/algorithms/hpcg/hpcg.hpp @@ -31,6 +31,8 @@ #include "hpcg_data.hpp" #include "multigrid_v_cycle.hpp" +#include + namespace grb { namespace algorithms { @@ -102,6 +104,7 @@ namespace grb { const ResidualType tolerance, size_t &iterations, ResidualType &norm_residual, + bool print_iter_stats, const Ring &ring = Ring(), const Minus &minus = Minus() ) { @@ -139,6 +142,8 @@ namespace grb { ResidualType old_r_dot_z { 0.0 }, r_dot_z { 0.0 }, beta { 0.0 }; size_t iter { 0 }; + grb::utils::Timer timer; + #ifdef HPCG_PRINT_STEPS DBG_print_norm( p, "start p" ); DBG_print_norm( Ap, "start Ap" ); @@ -150,8 +155,17 @@ namespace grb { DBG_println( "========= iteration " << iter << " =========" ); #endif if( with_preconditioning ) { - ret = ret ? ret : internal::multi_grid( data, data.coarser_level, presmoother_steps, postsmoother_steps, ring, minus ); + if( print_iter_stats ) { + timer.reset(); + } + ret = ret ? ret : internal::multi_grid( data, data.coarser_level, + presmoother_steps, postsmoother_steps, ring, minus ); assert( ret == SUCCESS ); + if( print_iter_stats ) { + double duration = timer.time(); + std::cout << "iteration, pre-conditioner: " << iter << "," + << duration << std::endl; + } } else { ret = ret ? ret : grb::set( z, r ); // z = r; assert( ret == SUCCESS ); @@ -215,6 +229,10 @@ namespace grb { norm_residual = std::sqrt( norm_residual ); + if( print_iter_stats ) { + std::cout << "iteration, residual: " << iter << "," << norm_residual << std::endl; + } + ++iter; } while( iter < max_iterations && norm_residual / norm_residual_initial > tolerance && ret == SUCCESS ); diff --git a/include/graphblas/algorithms/hpcg/matrix_building_utils.hpp b/include/graphblas/algorithms/hpcg/matrix_building_utils.hpp index 1facabe49..2dfeabc49 100644 --- a/include/graphblas/algorithms/hpcg/matrix_building_utils.hpp +++ b/include/graphblas/algorithms/hpcg/matrix_building_utils.hpp @@ -31,15 +31,65 @@ #include #include #include +#include #include #include "ndim_matrix_builders.hpp" +#define PAR + + + +#ifndef PAR +#include +#endif + + namespace grb { namespace algorithms { + template< typename T > void partition_nonzeroes( + T num_nonzeroes, + T& first_offset, + T& last_offset + ) { + const size_t num_procs{ spmd<>::nprocs() }; + const T per_process{ ( num_nonzeroes + num_procs - 1 ) / num_procs }; // round up + first_offset = std::min( per_process * static_cast< T >( spmd<>::pid() ), num_nonzeroes ); + last_offset = std::min( first_offset + per_process, num_nonzeroes ); + } + + template< typename IterT > void partition_iteration_range( + size_t num_nonzeroes, + IterT &begin, + IterT &end + ) { + assert( num_nonzeroes == static_cast< size_t >( end - begin ) ); + size_t first, last; + partition_nonzeroes( num_nonzeroes, first, last ); + if( last < num_nonzeroes ) { + end = begin; + end += last; + } + begin += first; + } + +#ifndef PAR + template< typename T > void partition_rows( + T rows, + T& first_row, + T& last_row + ) { + const size_t num_procs{ spmd<>::nprocs() }; + const T per_process{ ( rows + num_procs - 1 ) / num_procs }; // round up + first_row = std::min( per_process * static_cast< T >( spmd<>::pid() ), rows ); + last_row = std::min( first_row + per_process, rows ); + } +#endif + + /** * @brief Builds a \p DIMS -dimensional system matrix for HPCG simulation. * @@ -60,16 +110,36 @@ namespace grb { template< std::size_t DIMS, typename T, enum grb::Backend B > grb::RC build_ndims_system_matrix( grb::Matrix< T, B > & M, const std::array< std::size_t, DIMS > & sys_sizes, std::size_t halo_size, T diag_value, T non_diag_value ) { static_assert( DIMS > 0, "DIMS must be > 0" ); - std::size_t n { std::accumulate( sys_sizes.cbegin(), sys_sizes.cend(), 1UL, std::multiplies< std::size_t >() ) }; + size_t n { std::accumulate( sys_sizes.cbegin(), sys_sizes.cend(), 1UL, std::multiplies< size_t >() ) }; if( grb::nrows( M ) != n || grb::nrows( M ) != grb::ncols( M ) ) { throw std::invalid_argument( "wrong matrix dimensions: matrix should " "be square" " and in accordance with given system " "sizes" ); } - grb::algorithms::matrix_generator_iterator< DIMS, T > begin( sys_sizes, 0UL, halo_size, diag_value, non_diag_value ); - grb::algorithms::matrix_generator_iterator< DIMS, T > end( sys_sizes, n, halo_size, diag_value, non_diag_value ); - return buildMatrixUnique( M, begin, end, grb::IOMode::SEQUENTIAL ); +#ifdef PAR + using coord_t = unsigned; + if( n > std::numeric_limits< coord_t >::max() ) { + throw std::domain_error( "CoordT cannot store the matrix coordinates" ); + } + std::array< coord_t, DIMS > _sys_sizes; + for( size_t i = 0; i < DIMS; i++ ) _sys_sizes[i] = sys_sizes[i]; + grb::algorithms::hpcg_builder< DIMS, coord_t, T > hpcg_system( _sys_sizes, halo_size ); + grb::algorithms::matrix_generator_iterator< DIMS, coord_t, T > begin( + hpcg_system.make_begin_iterator( diag_value, non_diag_value ) ); + grb::algorithms::matrix_generator_iterator< DIMS, coord_t, T > end( + hpcg_system.make_end_iterator( diag_value, non_diag_value ) + ); + partition_iteration_range( hpcg_system.system_size(), begin, end ); + + // std::cout << "num nonzeroes " << ( end - begin ) << std::endl; +#else + size_t first_row, last_row; + partition_rows( n, first_row, last_row ); + grb::algorithms::old::matrix_generator_iterator< DIMS, T > begin( sys_sizes, first_row, halo_size, diag_value, non_diag_value ); + grb::algorithms::old::matrix_generator_iterator< DIMS, T > end( sys_sizes, last_row, halo_size, diag_value, non_diag_value ); +#endif + return buildMatrixUnique( M, begin, end, grb::IOMode::PARALLEL ); } /** @@ -97,7 +167,7 @@ namespace grb { template< std::size_t DIMS, typename T, enum grb::Backend B > grb::RC build_ndims_coarsener_matrix( grb::Matrix< T, B > & M, const std::array< std::size_t, DIMS > & coarser_sizes, const std::array< std::size_t, DIMS > & finer_sizes ) { static_assert( DIMS > 0, "DIMS must be > 0" ); - std::size_t const rows { std::accumulate( coarser_sizes.cbegin(), coarser_sizes.cend(), 1UL, std::multiplies< std::size_t >() ) }; + size_t const rows { std::accumulate( coarser_sizes.cbegin(), coarser_sizes.cend(), 1UL, std::multiplies< size_t >() ) }; for( std::size_t i { 0 }; i < coarser_sizes.size(); i++ ) { std::size_t step = finer_sizes[ i ] / coarser_sizes[ i ]; if( step * coarser_sizes[ i ] != finer_sizes[ i ] ) { @@ -112,10 +182,32 @@ namespace grb { " with rows == " "and cols == " ); } - - grb::algorithms::coarsener_generator_iterator< DIMS, T > begin( coarser_sizes, finer_sizes, 0 ); - grb::algorithms::coarsener_generator_iterator< DIMS, T > end( coarser_sizes, finer_sizes, rows ); - return buildMatrixUnique( M, begin, end, grb::IOMode::SEQUENTIAL ); +#ifdef PAR + using coord_t = unsigned; + if( rows > std::numeric_limits< coord_t >::max() ) { + throw std::domain_error( "CoordT cannot store the row coordinates" ); + } + if( cols > std::numeric_limits< coord_t >::max() ) { + throw std::domain_error( "CoordT cannot store the column coordinates" ); + } + std::array< coord_t, DIMS > _coarser_sizes, _finer_sizes; + for( size_t i = 0; i < DIMS; i++ ) { + _coarser_sizes[i] = coarser_sizes[i]; + _finer_sizes[i] = finer_sizes[i]; + } + grb::algorithms::hpcg_coarsener_builder< DIMS, coord_t, T > coarsener( _coarser_sizes, _finer_sizes ); + grb::algorithms::coarsener_generator_iterator< DIMS, coord_t, T > begin( coarsener.make_begin_iterator() ); + grb::algorithms::coarsener_generator_iterator< DIMS, coord_t, T > end( + coarsener.make_end_iterator() + ); + partition_iteration_range( coarsener.system_size(), begin, end ); +#else + size_t first_row, last_row; + partition_rows( rows, first_row, last_row ); + grb::algorithms::old::coarsener_generator_iterator< DIMS, T > begin( coarser_sizes, finer_sizes, first_row ); + grb::algorithms::old::coarsener_generator_iterator< DIMS, T > end( coarser_sizes, finer_sizes, last_row ); +#endif + return buildMatrixUnique( M, begin, end, grb::IOMode::PARALLEL ); } /** diff --git a/include/graphblas/algorithms/hpcg/ndim_matrix_builders.hpp b/include/graphblas/algorithms/hpcg/ndim_matrix_builders.hpp index c00eb65b2..06672d110 100644 --- a/include/graphblas/algorithms/hpcg/ndim_matrix_builders.hpp +++ b/include/graphblas/algorithms/hpcg/ndim_matrix_builders.hpp @@ -44,200 +44,91 @@ #include #include #include +#include +#include +#include -namespace grb { +#include +#include +#include - namespace algorithms { - /** - * @brief Base class that iterates on DIMS dimensions starting from the first one. - * - * The coordinates are assumed to generate the row number in a matrix whose number of rows is - * the product of all sizes. This class generates row numbers for physical problems described as - * systems of linear equations in an n-dimensional space. - * - * Example of iterations in a 3D (x, y, z) system of size (4,3,2), with generated row numbers - * reported as '=> ROW': - * - z[0] - * - y[0] - * - x[0] => 0, x[1] => 1, x[2] => 2, x[3] => 3 - * - y[1] - * - x[0] => 4, x[1] => 5, x[2] => 6, x[3] => 7 - * - y[2] - * - x[0] => 8, x[1] => 9, x[2] => 10, x[3] => 11 - * - z[1] - * - y[0] - * - x[0] => 12, x[1] => 13, x[2] => 14, x[3] => 15 - * - y[1] - * - x[0] => 16, x[1] => 17, x[2] => 18, x[3] => 19 - * - y[2] - * - x[0] => 20, x[1] => 21, x[2] => 22, x[3] => 23 - * - * The main goal of this class is to be derived by other classes to generate matrices in an - * STL-iterator-fashion; hence, this class contains all the code for basic coordinate-to-row-column - * conversion in \p DIM dimensions and the basic logic to increment the row number. - * - * @tparam DIMS number os dimensions of the system - */ - template< std::size_t DIMS > - struct row_generator { - using RowIndexType = std::size_t; ///< numeric type of rows - using array_t = std::array< RowIndexType, - DIMS >; ///< type for the array storing the coordinates. +namespace grb { - const array_t physical_sizes; ///< size of each dimension, starting from the one to be explored first + namespace algorithms { - /** - * @brief Construct a new row generator object - * @param[in] _sizes array of sizes of each dimension; no dimension should be 0, otherwise an exception - * is thrown - * @param[in] first_row first row to iterate from; it is allowed to be beyond the matrix size, e.g. to create - * an end iterator (no check occurs) - */ - row_generator( const array_t & _sizes, RowIndexType first_row ) : physical_sizes( _sizes ) { - static_assert( DIMS > 0, "DIMS should be higher than 0" ); - for( const auto i : _sizes ) { - if( i == static_cast< RowIndexType >( 0U ) ) { - throw std::invalid_argument( "All dimension sizes must " - "be > 0" ); - } - } - row_to_coords( first_row ); - } + template< + size_t DIMS, + typename CoordT, + typename T + > + class hpcg_builder; + + template< + size_t DIMS, + typename CoordT, + typename T + > + struct matrix_generator_iterator { + + using RowIndexType = CoordT; ///< numeric type of rows + using ColumnIndexType = CoordT; + using ValueType = T; + friend hpcg_builder< DIMS, CoordT, T >; - row_generator( const row_generator & o ) = default; + using linear_system_t = grb::utils::geometry::linearized_halo_ndim_system< RowIndexType, DIMS >; + using __iter_t = typename linear_system_t::iterator; + using self_t = matrix_generator_iterator< DIMS, CoordT, T >; - row_generator( row_generator && o ) = default; + struct __value { - protected: - // x: row_coords[0], y: row_coords[1], z: row_coords[2], ... - array_t row_coords; ///< n-D coordinates from which to compute the row + friend self_t; - /** - * @brief converts a row number into a n-D coordinates according to the sizes in #physical_sizes - * - * In case the input is higher than the nunber of rows, the last coordinate is allowed to - * go beyond its physical size. E.g., if the system has size (4,3,2) and \p rowcol is 24, - * the coordinates are (0,0,3). - * - * @param[in] rowcol row number to convert; it can be any number - */ - void row_to_coords( RowIndexType rowcol ) { - std::size_t s = 1; - for( std::size_t i { 0 }; i < row_coords.size() - 1; i++ ) - s *= physical_sizes[ i ]; - - for( typename array_t::size_type i { row_coords.size() - 1 }; i > 0; i-- ) { - row_coords[ i ] = rowcol / s; - rowcol -= row_coords[ i ] * s; - s /= physical_sizes[ i ]; - } - row_coords[ 0 ] = rowcol % physical_sizes[ 0 ]; - } + __value( + ValueType diag, + ValueType non_diag, + RowIndexType i, + ColumnIndexType j + ) noexcept : + diagonal_value( diag ), + non_diagonal_value( non_diag ), + _i( i ), + _j( j ) + {} - /** - * @brief Pure function converting an array of coordinates into a row number, based on #physical_sizes. - * @param a the #array_t array of coordinates to convert - * @return #RowIndexType the row corresponding to the coordinates in \p a - */ - RowIndexType coords_to_rowcol( const array_t & a ) const { - RowIndexType row { 0 }; - RowIndexType s { 1 }; - for( typename array_t::size_type i { 0 }; i < a.size(); i++ ) { - row += s * a[ i ]; - s *= physical_sizes[ i ]; - } - return row; - } + __value( const __value & ) = default; - /** - * @brief Increment #row_coords in order to move to the next coordinate (according to the - * n-dimensional iteration order) and update #current_row accordingly. - * - * To be used by derived classes in order to generate the matrix, e.g. via the \c operator()++ - * operator prescribed for STL-like iterators. - */ - void increment_row() { - bool rewind; - typename array_t::size_type i { 0 }; - do { - typename array_t::value_type & coord = row_coords[ i ]; - // must rewind dimension if we wrap-around - typename array_t::value_type new_coord = ( coord + 1 ) % physical_sizes[ i ]; - rewind = new_coord < coord; - coord = new_coord; - ++i; - } while( rewind && i < row_coords.size() - 1 ); // rewind only the first N-1 coordinates - - // if we still have to rewind, increment the last coordinate, which is unbounded - if( rewind ) { - row_coords.back()++; + __value & operator=( const __value & ) = default; + + inline RowIndexType i() const { return _i; } + inline ColumnIndexType j() const { return _j; } + inline ValueType v() const { + return j() == i() ? diagonal_value : non_diagonal_value; } - } - }; - // =============================================================== + private: + ValueType diagonal_value; ///< value to be emitted when the object has moved to the diagonal + ValueType non_diagonal_value; ///< value to emit outside of the diagonal + RowIndexType _i; + ColumnIndexType _j; + }; - /** - * @brief STL-like iterable class to generate the values for a matrix by iterating in an n-dimensional - * space along the coordinates. - * - * For each \f$ X=(x0, x1, ...,xn) \f$ point of the underlying (n+1)-dimensional space, - * this class iterates through the points of the n-dimensional halo of radius \p halo around \f$ X \f$, - * generating the row number corresponding to \f$ X \f$ and the column number corresponding to - * each halo point. At each coordinate \code (row, col) \endcode generated this way, the corresponding matrix value - * being generated depends on whether \code row == col \endcode. - * - * @tparam DIMS number of dimensions of the system - * @tparam HALO halo size, determining the number of points to iterate around and thus the column coordinates - * @tparam T type of matrix values - */ - template< std::size_t DIMS, typename T = double > - struct matrix_generator_iterator : public row_generator< DIMS > { + // interface for std::random_access_iterator + using iterator_category = std::random_access_iterator_tag; + using value_type = __value; + using pointer = value_type; + using reference = value_type; + using difference_type = typename __iter_t::difference_type; - using RowIndexType = typename row_generator< DIMS >::RowIndexType; - using ColumnIndexType = typename row_generator< DIMS >::RowIndexType; - using ValueType = T; - using array_t = typename row_generator< DIMS >::array_t; - using value_type = std::pair< std::pair< RowIndexType, ColumnIndexType >, T >; - - // halo may in future become a DIM-size array to iterate in arbitrary shapes - const RowIndexType halo; ///< number of points per dimension to iterate around - const ValueType diagonal_value; ///< value to be emitted when the object has moved to the diagonal - const ValueType non_diagonal_value; ///< value to emit outside of the diagonal + matrix_generator_iterator( const self_t & ) = default; - /** - * @brief Construct a new \c matrix_generator_iterator object, setting the current row as \p row - * and emitting \p diag if the iterator has moved on the diagonal, \p non_diag otherwise. - * - * @param sizes array with the sizes along the dimensions - * @param row current row to initialize the matrix on - * @param _halo halo of points to iterate around; must be > 0 - * @param diag value to emit when on the diagonal - * @param non_diag value to emit outside the diagonal - */ - matrix_generator_iterator( const array_t & sizes, RowIndexType row, RowIndexType _halo, ValueType diag, ValueType non_diag ) : - row_generator< DIMS >( sizes, row ), halo( _halo ), diagonal_value( diag ), non_diagonal_value( non_diag ) { - if( halo <= 0 ) { - throw std::invalid_argument( "halo should be higher than 0" ); - } - for( const auto i : sizes ) { - if( i < static_cast< RowIndexType >( 2 * halo + 1 ) ) { - throw std::invalid_argument( "Iteration halo goes beyond system sizes" ); - } - } - current_values.first.first = row; - update_column_max_values(); - reset_all_columns(); - current_values.first.second = this->coords_to_rowcol( col_coords ); - current_values.second = v(); - } + matrix_generator_iterator( self_t && ) = default; - matrix_generator_iterator( const matrix_generator_iterator & o ) = default; + self_t & operator=( const self_t & ) = default; - matrix_generator_iterator( matrix_generator_iterator && o ) = default; + self_t & operator=( self_t && ) = default; /** * @brief Increments the iterator by moving coordinates to the next (row, column) to iterate on. @@ -248,22 +139,22 @@ namespace grb { * * @return matrix_generator_iterator& \c this object, with the updated state */ - matrix_generator_iterator< DIMS, T > & operator++() { - bool must_rewind = increment_column(); - if( must_rewind ) { - this->increment_row(); - // after changing row, we must find the first non-zero column - reset_all_columns(); - current_values.first.first = this->coords_to_rowcol( this->row_coords ); - update_column_max_values(); - } - // trigger column update after row update, as a row update - // triggers a column update - current_values.first.second = this->coords_to_rowcol( col_coords ); - current_values.second = this->v(); + self_t & operator++() noexcept { + (void) ++_sys_iter; + update_coords(); return *this; } + self_t & operator+=( size_t offset ) { + _sys_iter += offset; + update_coords(); + return *this; + } + + difference_type operator-( const self_t &other ) const { + return this->_sys_iter - other._sys_iter; + } + /** * @brief Operator to compare \c this against \p o and return whether they differ. * @@ -271,11 +162,8 @@ namespace grb { * @return true of the row or the column is different between \p o and \c this * @return false if both row and column of \p o and \c this are equal */ - bool operator!=( const matrix_generator_iterator< DIMS, T > & o ) const { - if( o.i() != this->i() ) { - return true; - } - return o.j() != this->j(); + bool operator!=( const self_t &o ) const { + return this->_sys_iter != o._sys_iter; } /** @@ -285,8 +173,8 @@ namespace grb { * @return true of the row or the column is different between \p o and \c this * @return false if both row and column of \p o and \c this are equal */ - bool operator==( const matrix_generator_iterator< DIMS, T > & o ) const { - return o.i() == this->i() && o.j() == this->j(); + bool operator==( const self_t &o ) const { + return ! operator!=( o ); } /** @@ -295,22 +183,26 @@ namespace grb { * Useful when building the matrix by copying the triple of coordinates and value, * like for the BSP1D backend. */ - const value_type & operator*() const { - return current_values; + reference operator*() const { + return _val; + } + + pointer operator->() const { + return &_val; } /** * @brief Returns current row. */ inline RowIndexType i() const { - return current_values.first.first; + return _val.i(); } /** * @brief Returns current column. */ inline ColumnIndexType j() const { - return current_values.first.second; + return _val.j(); } /** @@ -320,80 +212,143 @@ namespace grb { * #i() \code == \endcode \code this-> \endcode #j()), #non_diagonal_value otherwise */ inline ValueType v() const { - return j() == i() ? diagonal_value : non_diagonal_value; + return _val.v(); } private: - // offsets w.r.t. rows - array_t col_coords; ///< coordinates corresponding to current column - array_t column_max_values; ///< maximum values for the column coordinates, to stop column increment - //// and reset the column coordinates - value_type current_values; ///< triple storing the current value for row, column and matrix element + value_type _val; + const linear_system_t *_lin_system; + __iter_t _sys_iter; /** - * @brief Updates the maximum values each column coordinate can reach, according to the row coordinates. + * @brief Construct a new \c matrix_generator_iterator object, setting the current row as \p row + * and emitting \p diag if the iterator has moved on the diagonal, \p non_diag otherwise. * - * To be called after each row coordinates update. + * @param sizes array with the sizes along the dimensions + * @param _halo halo of points to iterate around; must be > 0 + * @param diag value to emit when on the diagonal + * @param non_diag value to emit outside the diagonal */ - void update_column_max_values() { - for( std::size_t i { 0 }; i < column_max_values.size(); i++ ) { - column_max_values[ i ] = std::min( this->physical_sizes[ i ] - 1, this->row_coords[ i ] + halo ); - } + matrix_generator_iterator( + const linear_system_t &system, + ValueType diag, + ValueType non_diag + ) noexcept : + _val( diag, non_diag, 0, 0 ), + _lin_system( &system ), + _sys_iter( system.begin() ) + { + update_coords(); } - /** - * @brief Resets the value of column dimension \p dim to the first possible value. - * - * The final value of #col_coords[dim] depends on the current row (#row_coords) and on the \p halo - * and is \f$ max(0, \f$ #row_coords \f$[dim])\f$. - * - * @param dim the dimension to reset - */ - void reset_column_coords( std::size_t dim ) { - // cannot use std::max because row_coords is unsigned and can wrap-around - col_coords[ dim ] = this->row_coords[ dim ] <= halo ? 0 : ( this->row_coords[ dim ] - halo ); + void update_coords() { + _val._i = _sys_iter->get_element_linear(); + _val._j = _sys_iter->get_neighbor_linear(); } + }; - /** - * @brief resets all values in #col_coords to the initial coordinates, - * iterating from on the current row. - */ - void reset_all_columns() { - for( std::size_t i { 0 }; i < col_coords.size(); i++ ) { - reset_column_coords( i ); + + template< + size_t DIMS, + typename CoordT, + typename T + > + class hpcg_builder { + + using system_t = grb::utils::geometry::linearized_halo_ndim_system< CoordT, DIMS >; + + system_t system; + // const grb::utils::geometry::linearized_halo_ndim_system< CoordT, DIMS > system; + const CoordT halo; + + public: + + using hpcg_sys_iterator = matrix_generator_iterator< DIMS, CoordT, T >; + + hpcg_builder( + const std::array< CoordT, DIMS > &sizes, + CoordT _halo + ) : + system( sizes, _halo ), + halo( _halo ) + { + if( _halo <= 0 ) { + throw std::invalid_argument( "halo should be higher than 0" ); + } + for( const auto i : sizes ) { + if( i < 2 * _halo + 1 ) { + throw std::invalid_argument( "Iteration halo goes beyond system sizes" ); + } } } - /** - * @brief Increment the column according to the iteration order, thus resetting the column coordinates - * when the last possible column value for the current row has been reached. - * - * @return true if the column coordinates have been reset, and thus also the row must be incremented - * @return false if the column coordinates - */ - bool increment_column() { - bool rewind; - typename array_t::size_type i { 0 }; - do { - typename array_t::value_type & col = col_coords[ i ]; - // must rewind dimension if the column offset is already at the max value - // or if the column coordinates are already at the max value - rewind = ( col == column_max_values[ i ] ); - if( rewind ) { - // col = this->row_coords[i] == 0 ? 0 : this->row_coords[i] - (halo); - reset_column_coords( i ); - } else { - ++col; - } - ++i; - } while( rewind && i < col_coords.size() ); + hpcg_builder( const hpcg_builder< DIMS, CoordT, T> & ) = delete; + + hpcg_builder( hpcg_builder< DIMS, CoordT, T> && ) = delete; + + hpcg_builder< DIMS, CoordT, T> & operator=( const hpcg_builder< DIMS, CoordT, T> & ) = delete; + + hpcg_builder< DIMS, CoordT, T> & operator=( hpcg_builder< DIMS, CoordT, T> && ) = delete; + + size_t system_size() const { + return system.halo_system_size(); + } + + hpcg_sys_iterator make_begin_iterator( + T diag, + T non_diag + ) { + return hpcg_sys_iterator( system, diag, non_diag ); + } - // if we change z, then we also must reset x and y; if only y, we must reset x, and so on - return rewind; + hpcg_sys_iterator make_end_iterator( + T diag, + T non_diag + ) { + hpcg_sys_iterator result( system, diag, non_diag ); + result += system_size() - 1; // do not trigger boundary checks + ++result; + return result; } + }; - // =============================================================== + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + template< + size_t DIMS, + typename CoordT, + typename T + > + class hpcg_coarsener_builder; + /** * @brief Class to generate the coarsening matrix of an underlying \p DIMS -dimensional system. @@ -408,56 +363,66 @@ namespace grb { * @tparam DIMS number of dimensions of the system * @tparam T type of matrix values */ - template< std::size_t DIMS, typename T = double > - struct coarsener_generator_iterator : public row_generator< DIMS > { + template< + size_t DIMS, + typename CoordT, + typename T + > + struct coarsener_generator_iterator { + + friend hpcg_coarsener_builder< DIMS, CoordT, T >; - using RowIndexType = typename row_generator< DIMS >::RowIndexType; - using ColumnIndexType = typename row_generator< DIMS >::RowIndexType; + using RowIndexType = CoordT; ///< numeric type of rows + using ColumnIndexType = CoordT; using ValueType = T; - using array_t = typename row_generator< DIMS >::array_t; - using value_type = std::pair< std::pair< RowIndexType, ColumnIndexType >, T >; - // the sizes to project from - const array_t finer_sizes; ///< the size of the finer system (columns) - array_t steps; ///< array of steps, i.e. how much each column coordinate (finer system) must be - //// incremented when incrementing the row coordinates; is is the ration between - //// #finer_sizes and row_generator#physical_sizes + using lin_system_t = grb::utils::geometry::linearized_ndim_system< CoordT, + grb::utils::geometry::array_vector_storage< CoordT, DIMS > >; + using __iter_t = typename lin_system_t::iterator; + using self_t = coarsener_generator_iterator< DIMS, CoordT, T >; + using array_t = std::array< CoordT, DIMS >; - /** - * @brief Construct a new \c coarsener_generator_iterator object from the coarser and finer sizes, - * setting its row at \p _current_row and the column at the corresponding value. - * - * Each finer size must be an exact multiple of the corresponding coarser size, otherwise the - * construction will throw an exception. - * - * @param _coarser_sizes sizes of the coarser system (rows) - * @param _finer_sizes sizes of the finer system (columns) - * @param _current_row row (in the coarser system) to set the iterator on - */ - coarsener_generator_iterator( const array_t & _coarser_sizes, const array_t & _finer_sizes, RowIndexType _current_row ) : - row_generator< DIMS >( _coarser_sizes, _current_row ), finer_sizes( _finer_sizes ), steps( { 0 } ) { - for( std::size_t i { 0 }; i < DIMS; i++ ) { - // finer size MUST be an exact multiple of coarser_size - typename array_t::value_type step { _finer_sizes[ i ] / _coarser_sizes[ i ] }; - if( step == 0 || finer_sizes[ i ] / step != this->physical_sizes[ i ] ) { - throw std::invalid_argument( std::string( "finer size " - "of " - "dimension" - " " ) + - std::to_string( i ) + - std::string( "is not an exact multiple of coarser " - "size" ) ); - } - steps[ i ] = step; + struct __value { + + friend self_t; + + __value( + RowIndexType i, + ColumnIndexType j + ) noexcept : + _i( i ), + _j( j ) + {} + + __value( const __value & ) = default; + + __value & operator=( const __value & ) = default; + + inline RowIndexType i() const { return _i; } + inline ColumnIndexType j() const { return _j; } + inline ValueType v() const { + return static_cast< ValueType >( 1 ); } - current_values.first.first = _current_row; - current_values.first.second = coords_to_finer_col(); - current_values.second = v(); - } - coarsener_generator_iterator( const coarsener_generator_iterator & o ) = default; + private: + RowIndexType _i; + ColumnIndexType _j; + }; + + // interface for std::random_access_iterator + using iterator_category = std::random_access_iterator_tag; + using value_type = __value; + using pointer = const value_type; + using reference = const value_type&; + using difference_type = typename __iter_t::difference_type; + + coarsener_generator_iterator( const self_t & o ) = default; + + coarsener_generator_iterator( self_t && o ) = default; - coarsener_generator_iterator( coarsener_generator_iterator && o ) = default; + self_t & operator=( const self_t & ) = default; + + self_t & operator=( self_t && ) = default; /** * @brief Increments the row and the column according to the respective physical sizes, @@ -465,29 +430,34 @@ namespace grb { * * @return \code *this \endcode, i.e. the same object with the updates row and column */ - coarsener_generator_iterator< DIMS, T > & operator++() { - this->increment_row(); - current_values.first.first = this->coords_to_rowcol( this->row_coords ); - current_values.first.second = coords_to_finer_col(); - current_values.second = v(); + self_t & operator++() noexcept { + (void) ++_sys_iter; + update_coords(); + return *this; + } + + self_t & operator+=( size_t offset ) { + _sys_iter += offset; + update_coords(); return *this; } + difference_type operator-( const self_t &o ) const { + return this->_sys_iter - o._sys_iter; + } + /** * @brief Returns whether \c this and \p o differ. */ - bool operator!=( const coarsener_generator_iterator< DIMS, T > & o ) const { - if( this->i() != o.i() ) { - return true; - } - return this->j() != o.j(); + bool operator!=( const self_t &o ) const { + return this->_sys_iter != o._sys_iter; } /** * @brief Returns whether \c this and \p o are equal. */ - bool operator==( const coarsener_generator_iterator< DIMS, T > & o ) const { - return this->i() == o.i() && this->j() == o.j(); + bool operator==( const self_t &o ) const { + return ! this->operator!=( o ); } /** @@ -496,101 +466,151 @@ namespace grb { * Useful when building the matrix by copying the triple of coordinates and value, * like for the BSP1D backend. */ - const value_type & operator*() const { - return current_values; + reference operator*() const { + return _val; + } + + pointer operator->() const { + return &_val; } /** * @brief Returns the current row, according to the coarser system. */ inline RowIndexType i() const { - return current_values.first.first; + return _val.i(); } /** * @brief Returns the current column, according to the finer system. */ inline ColumnIndexType j() const { - return current_values.first.second; + return _val.j(); } /** * @brief Returns always 1, as the coarsening keeps the same value. */ inline ValueType v() const { - return static_cast< ValueType >( 1 ); + return _val.v(); } private: - value_type current_values; ///< triple storing the current value for row, column and matrix element + //// incremented when incrementing the row coordinates; is is the ration between + //// #finer_sizes and row_generator#physical_sizes + const lin_system_t *_lin_sys; + const array_t *_steps; ///< array of steps, i.e. how much each column coordinate (finer system) must be + __iter_t _sys_iter; + value_type _val; + + /** + * @brief Construct a new \c coarsener_generator_iterator object from the coarser and finer sizes, + * setting its row at \p _current_row and the column at the corresponding value. + * + * Each finer size must be an exact multiple of the corresponding coarser size, otherwise the + * construction will throw an exception. + * + * @param _coarser_sizes sizes of the coarser system (rows) + * @param _finer_sizes sizes of the finer system (columns) + * @param _current_row row (in the coarser system) to set the iterator on + */ + coarsener_generator_iterator( + const lin_system_t &system, + const array_t &steps + ) noexcept : + _lin_sys( &system ), + _steps( &steps ), + _sys_iter( _lin_sys->begin() ), + _val(0, 0) + { + update_coords(); + } + + void update_coords() noexcept { + _val._i = _sys_iter->get_linear_position(); + _val._j = coarse_rows_to_finer_col(); + } /** * @brief Returns the row coordinates converted to the finer system, to compute * the column value. */ - ColumnIndexType coords_to_finer_col() const { - ColumnIndexType row { 0 }; + ColumnIndexType coarse_rows_to_finer_col() const noexcept { + ColumnIndexType finer { 0 }; ColumnIndexType s { 1 }; - for( typename array_t::size_type i { 0 }; i < this->row_coords.size(); i++ ) { - s *= steps[ i ]; - row += s * this->row_coords[ i ]; - s *= this->physical_sizes[ i ]; + for( size_t i { 0 }; i < DIMS; i++ ) { + s *= (*_steps)[ i ]; + finer += s * _sys_iter->get_position()[ i ]; + s *= _lin_sys->get_sizes()[ i ]; } - return row; + return finer; } }; - } // end namespace algorithms -} // end namespace grb + template< + size_t DIMS, + typename CoordT, + typename T + > + class hpcg_coarsener_builder { + public: -namespace std { + using array_t = std::array< CoordT, DIMS >; + using hpcg_coarsener_iterator = coarsener_generator_iterator< DIMS, CoordT, T >; - /** - * Specialises the standard STL iterator traits for - * #grb::algorithms::matrix_generator_iterator - */ - template< size_t DIMS, typename T > - class iterator_traits< - grb::algorithms::matrix_generator_iterator< DIMS, T > - > { + hpcg_coarsener_builder( + const array_t &_coarser_sizes, + const array_t &_finer_sizes + ) : system( _coarser_sizes.begin(), _coarser_sizes.end() ) { + for( size_t i { 0 }; i < DIMS; i++ ) { + // finer size MUST be an exact multiple of coarser_size + size_t step { _finer_sizes[ i ] / _coarser_sizes[ i ] }; + if( step == 0 || _finer_sizes[ i ] / step != _coarser_sizes[ i ] ) { + throw std::invalid_argument( + std::string( "finer size of dimension " ) + std::to_string( i ) + + std::string( "is not an exact multiple of coarser size" ) + ); + } + steps[ i ] = step; + } + } - private: + hpcg_coarsener_builder( const hpcg_coarsener_builder< DIMS, CoordT, T> & ) = delete; - typedef grb::algorithms::matrix_generator_iterator< DIMS, T > SelfType; + hpcg_coarsener_builder( hpcg_coarsener_builder< DIMS, CoordT, T> && ) = delete; + hpcg_coarsener_builder< DIMS, CoordT, T> & operator=( const hpcg_coarsener_builder< DIMS, CoordT, T> & ) = delete; - public: + hpcg_coarsener_builder< DIMS, CoordT, T> & operator=( hpcg_coarsener_builder< DIMS, CoordT, T> && ) = delete; - typedef typename SelfType::ValueType value_type; - typedef const value_type * pointer; - typedef const value_type & reference; - typedef size_t difference_type; - typedef forward_iterator_tag iterator_category; + size_t system_size() const { + return system.system_size(); + } - }; + hpcg_coarsener_iterator make_begin_iterator() { + return hpcg_coarsener_iterator( system, steps ); + } - template< size_t DIMS, typename T > - class iterator_traits< - grb::algorithms::coarsener_generator_iterator< DIMS, T > - > { + hpcg_coarsener_iterator make_end_iterator() { + hpcg_coarsener_iterator result( system, steps ); + result += system_size() - 1; // do not trigger boundary checks + ++result; + return result; + } private: + const grb::utils::geometry::linearized_ndim_system< CoordT, + grb::utils::geometry::array_vector_storage< CoordT, DIMS > > system; - typedef grb::algorithms::coarsener_generator_iterator< DIMS, T > SelfType; - - - public: - - typedef typename SelfType::ValueType value_type; - typedef const value_type * pointer; - typedef const value_type & reference; - typedef size_t difference_type; - typedef forward_iterator_tag iterator_category; + array_t steps; ///< array of steps, i.e. how much each column coordinate (finer system) must be + //// incremented when incrementing the row coordinates; is is the ration between + //// #finer_sizes and row_generator#physical_sizes + }; - }; -} // end namespace std + } // namespace algorithms +} // namespace grb #endif // _H_GRB_ALGORITHMS_NDIM_MATRIX_BUILDERS diff --git a/include/graphblas/algorithms/hpcg/old_matrix_building_utils.hpp b/include/graphblas/algorithms/hpcg/old_matrix_building_utils.hpp new file mode 100644 index 000000000..9bb5c7a95 --- /dev/null +++ b/include/graphblas/algorithms/hpcg/old_matrix_building_utils.hpp @@ -0,0 +1,173 @@ + +/* + * Copyright 2021 Huawei Technologies Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file hpcg_matrix_building_utils.hpp + * @author Alberto Scolari (alberto.scolari@huawei.com) + * @brief Utilities to build the matrices for HPCG simulations in an arbitrary number of dimensions. + * @date 2021-04-30 + */ + +#ifndef _H_GRB_ALGORITHMS_OLD_MATRIX_BUILDING_UTILS +#define _H_GRB_ALGORITHMS_OLD_MATRIX_BUILDING_UTILS + +#include +#include +#include +#include +#include +#include + +#include + +#include "old_ndim_matrix_builders.hpp" + + +namespace grb { + namespace algorithms { + namespace old { + + + /** + * @brief Builds a \p DIMS -dimensional system matrix for HPCG simulation. + * + * This routine initializes \p M to a matrix representing a \p DIMS -dimensions system of sizes + * \p sys_sizes, with an iteration halo of size \p halo_size . The matrix diagonal values are initialized + * to \p diag_value while the other non-zero values are initialized to \p non_diag_value . + * + * @tparam DIMS system dimensions + * @tparam T type of matrix values + * @tparam B matrix GraphBLAS backend + * @param M the matrix to be initialized; it must be already constructed + * @param sys_sizes the sizes of the physical system + * @param halo_size the size of the halo of point to iterate in + * @param diag_value diagonal value + * @param non_diag_value value outside of the diagonal + * @return grb::RC the success value returned when trying to build the matrix + */ + template< std::size_t DIMS, typename T, enum grb::Backend B > + grb::RC build_ndims_system_matrix( grb::Matrix< T, B > & M, const std::array< std::size_t, DIMS > & sys_sizes, std::size_t halo_size, T diag_value, T non_diag_value ) { + static_assert( DIMS > 0, "DIMS must be > 0" ); + std::size_t n { std::accumulate( sys_sizes.cbegin(), sys_sizes.cend(), 1UL, std::multiplies< std::size_t >() ) }; + if( grb::nrows( M ) != n || grb::nrows( M ) != grb::ncols( M ) ) { + throw std::invalid_argument( "wrong matrix dimensions: matrix should " + "be square" + " and in accordance with given system " + "sizes" ); + } + grb::algorithms::matrix_generator_iterator< DIMS, T > begin( sys_sizes, 0UL, halo_size, diag_value, non_diag_value ); + grb::algorithms::matrix_generator_iterator< DIMS, T > end( sys_sizes, n, halo_size, diag_value, non_diag_value ); + return buildMatrixUnique( M, begin, end, grb::IOMode::SEQUENTIAL ); + } + + /** + * @brief Builds a coarsener matrix for an HPCG simulation. + * + * It initializes \p M as a rectangular matrix, with rows corresponding to the coarser system + * (of dimensions \p coarser_sizes - output) and columns corresponding to the finer system + * (of dimensions \p finer_sizes - input). The resulting coarsening matrix takes in input the finer system + * and coarsens it by keeping one element every \a S , where \a S is the ratio between the finer and + * the coarser dimension (computed for each dimension). In this way each \p DIMS -dimensional finer element + * corresponds to its bounding coarser element. + * + * For the coarsening to be feasible, the sizes of the finer system \b must be a multiple of those of the + * coarser system. If this condition is not met, an exception is thrown. + * + * @tparam DIMS system dimensions + * @tparam T type of matrix values + * @tparam B matrix GraphBLAS backend + * @param M the matrix to be initialized; it must be already constructed with proper dimensions + * @param coarser_sizes sizes of the coarser system + * @param finer_sizes sizes of the finer system; each one \b must be a multiple of the corresponding value + * in \p coarser_size , otherwise an exception is thrown + * @return grb::RC the success value returned when trying to build the matrix + */ + template< std::size_t DIMS, typename T, enum grb::Backend B > + grb::RC build_ndims_coarsener_matrix( grb::Matrix< T, B > & M, const std::array< std::size_t, DIMS > & coarser_sizes, const std::array< std::size_t, DIMS > & finer_sizes ) { + static_assert( DIMS > 0, "DIMS must be > 0" ); + std::size_t const rows { std::accumulate( coarser_sizes.cbegin(), coarser_sizes.cend(), 1UL, std::multiplies< std::size_t >() ) }; + for( std::size_t i { 0 }; i < coarser_sizes.size(); i++ ) { + std::size_t step = finer_sizes[ i ] / coarser_sizes[ i ]; + if( step * coarser_sizes[ i ] != finer_sizes[ i ] ) { + throw std::invalid_argument( "finer sizes should be a multiple of " + "coarser sizes" ); + } + } + std::size_t const cols { std::accumulate( finer_sizes.cbegin(), finer_sizes.cend(), 1UL, std::multiplies< std::size_t >() ) }; + if( grb::nrows( M ) != rows || grb::ncols( M ) != cols ) { + throw std::invalid_argument( "wrong matrix dimensions: matrix should " + "be rectangular" + " with rows == " + "and cols == " ); + } + + grb::algorithms::coarsener_generator_iterator< DIMS, T > begin( coarser_sizes, finer_sizes, 0 ); + grb::algorithms::coarsener_generator_iterator< DIMS, T > end( coarser_sizes, finer_sizes, rows ); + return buildMatrixUnique( M, begin, end, grb::IOMode::SEQUENTIAL ); + } + + /** + * @brief Populates \p masks with static color mask generated for a squared matrix of size \p matrix_size . + * + * Colors are built in the range [0, \p colors ), with the mask for color 0 being the array + * of values true in the positions \f$ [0, colors, 2*colors, ..., floor((system_size - 1)/colors) * color] \f$, + * for color 1 in the positions \f$ [1, 1+colors, 1+2*colors, ..., floor((system_size - 2)/colors) * color] \f$, + * etc.; the mask for color 0 is in \c masks[0], for color 1 in \c masks[1] and so on. + * + * The vectors stored in \p masks (assumed empty at the beginning) are built inside the function and populated + * only with the \c true values, leading to sparse vectors. This saves on storage space and allows + * GraphBLAS routines (like \c eWiseLambda() ) to iterate only on true values. + * + * @tparam B GraphBLAS backend for the vector + * @param masks output vector of color masks + * @param matrix_size size of the system matrix + * @param colors numbers of colors masks to build; it must be < \p matrix_size + * @return grb::RC the success value returned when trying to build the vector + */ + template< enum grb::Backend B > + grb::RC build_static_color_masks( std::vector< grb::Vector< bool, B > > & masks, std::size_t matrix_size, std::size_t colors ) { + if( ! masks.empty() ) { + throw std::invalid_argument( "vector of masks is expected to be " + "empty" ); + } + if( matrix_size < colors ) { + throw std::invalid_argument( "syztem size is < number of colors: too " + "small" ); + } + grb::RC rc { grb::SUCCESS }; + masks.reserve( colors ); + for( std::size_t i { 0U }; i < colors; i++ ) { + // build in-place, assuming the compiler deduces the right constructor according to B + masks.emplace_back( matrix_size ); + grb::Vector< bool > & mask = masks.back(); + // grb::set(mask, false); // DO NOT initialize false's explicitly, otherwise + // RBGS will touch them too and the runtime will increase! + for( std::size_t j = i; j < matrix_size; j += colors ) { + rc = grb::setElement( mask, true, j ); + assert( rc == grb::SUCCESS ); + if( rc != grb::SUCCESS ) + return rc; + } + } + return rc; + } + + } //namespace old + } // namespace algorithms +} // namespace grb + +#endif // _H_GRB_ALGORITHMS_MATRIX_BUILDING_UTILS diff --git a/include/graphblas/algorithms/hpcg/old_ndim_matrix_builders.hpp b/include/graphblas/algorithms/hpcg/old_ndim_matrix_builders.hpp new file mode 100644 index 000000000..256995b02 --- /dev/null +++ b/include/graphblas/algorithms/hpcg/old_ndim_matrix_builders.hpp @@ -0,0 +1,548 @@ + +/* + * Copyright 2021 Huawei Technologies Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file ndim_matrix_builders.hpp + * @author Alberto Scolari (alberto.scolari@huawei.com) + * @brief Utilities to build matrices for an HPCG simulation in a generic number of dimensions + * + * In particular, the main matrices are: + * - a system matrix, generated from an N-dimenional space of coordinates by iterating along + * each dimension in priority order, where the first dimension has highest priority and the last + * dimension least priority; for each point (row), all its N-dimensional neighbours within + * a given distance are generated for the column + * - a coarsening matrix, generated by iterating on a coarser system of N dimensions (row) and projecting + * each point to a corresponding system of finer sizes + * + * @date 2021-04-30 + */ + +#ifndef _H_GRB_ALGORITHMS_OLD_NDIM_MATRIX_BUILDERS +#define _H_GRB_ALGORITHMS_OLD_NDIM_MATRIX_BUILDERS + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace grb { + namespace algorithms { + namespace old { + + /** + * @brief Base class that iterates on DIMS dimensions starting from the first one. + * + * The coordinates are assumed to generate the row number in a matrix whose number of rows is + * the product of all sizes. This class generates row numbers for physical problems described as + * systems of linear equations in an n-dimensional space. + * + * Example of iterations in a 3D (x, y, z) system of size (4,3,2), with generated row numbers + * reported as '=> ROW': + * - z[0] + * - y[0] + * - x[0] => 0, x[1] => 1, x[2] => 2, x[3] => 3 + * - y[1] + * - x[0] => 4, x[1] => 5, x[2] => 6, x[3] => 7 + * - y[2] + * - x[0] => 8, x[1] => 9, x[2] => 10, x[3] => 11 + * - z[1] + * - y[0] + * - x[0] => 12, x[1] => 13, x[2] => 14, x[3] => 15 + * - y[1] + * - x[0] => 16, x[1] => 17, x[2] => 18, x[3] => 19 + * - y[2] + * - x[0] => 20, x[1] => 21, x[2] => 22, x[3] => 23 + * + * The main goal of this class is to be derived by other classes to generate matrices in an + * STL-iterator-fashion; hence, this class contains all the code for basic coordinate-to-row-column + * conversion in \p DIM dimensions and the basic logic to increment the row number. + * + * @tparam DIMS number os dimensions of the system + */ + template< std::size_t DIMS > + struct row_generator { + + using row_coordinate_type = std::size_t; ///< numeric type of rows + using array_t = std::array< row_coordinate_type, + DIMS >; ///< type for the array storing the coordinates. + + const array_t physical_sizes; ///< size of each dimension, starting from the one to be explored first + + /** + * @brief Construct a new row generator object + * @param[in] _sizes array of sizes of each dimension; no dimension should be 0, otherwise an exception + * is thrown + * @param[in] first_row first row to iterate from; it is allowed to be beyond the matrix size, e.g. to create + * an end iterator (no check occurs) + */ + row_generator( const array_t & _sizes, row_coordinate_type first_row ) : physical_sizes( _sizes ) { + static_assert( DIMS > 0, "DIMS should be higher than 0" ); + for( const auto i : _sizes ) { + if( i == static_cast< row_coordinate_type >( 0U ) ) { + throw std::invalid_argument( "All dimension sizes must " + "be > 0" ); + } + } + row_to_coords( first_row ); + } + + row_generator( const row_generator & o ) = default; + + row_generator( row_generator && o ) = default; + + protected: + // x: row_coords[0], y: row_coords[1], z: row_coords[2], ... + array_t row_coords; ///< n-D coordinates from which to compute the row + + /** + * @brief converts a row number into a n-D coordinates according to the sizes in #physical_sizes + * + * In case the input is higher than the nunber of rows, the last coordinate is allowed to + * go beyond its physical size. E.g., if the system has size (4,3,2) and \p rowcol is 24, + * the coordinates are (0,0,3). + * + * @param[in] rowcol row number to convert; it can be any number + */ + void row_to_coords( row_coordinate_type rowcol ) { + std::size_t s = 1; + for( std::size_t i { 0 }; i < row_coords.size() - 1; i++ ) + s *= physical_sizes[ i ]; + + for( typename array_t::size_type i { row_coords.size() - 1 }; i > 0; i-- ) { + row_coords[ i ] = rowcol / s; + rowcol -= row_coords[ i ] * s; + s /= physical_sizes[ i ]; + } + row_coords[ 0 ] = rowcol % physical_sizes[ 0 ]; + } + + /** + * @brief Pure function converting an array of coordinates into a row number, based on #physical_sizes. + * @param a the #array_t array of coordinates to convert + * @return #row_coordinate_type the row corresponding to the coordinates in \p a + */ + row_coordinate_type coords_to_rowcol( const array_t & a ) const { + row_coordinate_type row { 0 }; + row_coordinate_type s { 1 }; + for( typename array_t::size_type i { 0 }; i < a.size(); i++ ) { + row += s * a[ i ]; + s *= physical_sizes[ i ]; + } + return row; + } + + /** + * @brief Increment #row_coords in order to move to the next coordinate (according to the + * n-dimensional iteration order) and update #current_row accordingly. + * + * To be used by derived classes in order to generate the matrix, e.g. via the \c operator()++ + * operator prescribed for STL-like iterators. + */ + void increment_row() { + bool rewind; + typename array_t::size_type i { 0 }; + do { + typename array_t::value_type & coord = row_coords[ i ]; + // must rewind dimension if we wrap-around + typename array_t::value_type new_coord = ( coord + 1 ) % physical_sizes[ i ]; + rewind = new_coord < coord; + coord = new_coord; + ++i; + } while( rewind && i < row_coords.size() - 1 ); // rewind only the first N-1 coordinates + + // if we still have to rewind, increment the last coordinate, which is unbounded + if( rewind ) { + row_coords.back()++; + } + } + }; + + // =============================================================== + + /** + * @brief STL-like iterable class to generate the values for a matrix by iterating in an n-dimensional + * space along the coordinates. + * + * For each \f$ X=(x0, x1, ...,xn) \f$ point of the underlying (n+1)-dimensional space, + * this class iterates through the points of the n-dimensional halo of radius \p halo around \f$ X \f$, + * generating the row number corresponding to \f$ X \f$ and the column number corresponding to + * each halo point. At each coordinate \code (row, col) \endcode generated this way, the corresponding matrix value + * being generated depends on whether \code row == col \endcode. + * + * @tparam DIMS number of dimensions of the system + * @tparam HALO halo size, determining the number of points to iterate around and thus the column coordinates + * @tparam T type of matrix values + */ + template< std::size_t DIMS, typename T = double > + struct matrix_generator_iterator : public row_generator< DIMS > { + + using row_coordinate_type = typename row_generator< DIMS >::row_coordinate_type; + using column_coordinate_type = typename row_generator< DIMS >::row_coordinate_type; + using nonzero_value_type = T; + using array_t = typename row_generator< DIMS >::array_t; + using value_type = std::pair< std::pair< row_coordinate_type, column_coordinate_type >, T >; + + // halo may in future become a DIM-size array to iterate in arbitrary shapes + const row_coordinate_type halo; ///< number of points per dimension to iterate around + const nonzero_value_type diagonal_value; ///< value to be emitted when the object has moved to the diagonal + const nonzero_value_type non_diagonal_value; ///< value to emit outside of the diagonal + + /** + * @brief Construct a new \c matrix_generator_iterator object, setting the current row as \p row + * and emitting \p diag if the iterator has moved on the diagonal, \p non_diag otherwise. + * + * @param sizes array with the sizes along the dimensions + * @param row current row to initialize the matrix on + * @param _halo halo of points to iterate around; must be > 0 + * @param diag value to emit when on the diagonal + * @param non_diag value to emit outside the diagonal + */ + matrix_generator_iterator( const array_t & sizes, row_coordinate_type row, row_coordinate_type _halo, nonzero_value_type diag, nonzero_value_type non_diag ) : + row_generator< DIMS >( sizes, row ), halo( _halo ), diagonal_value( diag ), non_diagonal_value( non_diag ) { + if( halo <= 0 ) { + throw std::invalid_argument( "halo should be higher than " + "0" ); + } + for( const auto i : sizes ) { + if( i < static_cast< row_coordinate_type >( 2 * halo + 1 ) ) { + throw std::invalid_argument( "Iteration halo goes " + "beyond system sizes" ); + } + } + current_values.first.first = row; + update_column_max_values(); + reset_all_columns(); + current_values.first.second = this->coords_to_rowcol( col_coords ); + current_values.second = v(); + } + + matrix_generator_iterator( const matrix_generator_iterator & o ) = default; + + matrix_generator_iterator( matrix_generator_iterator && o ) = default; + + /** + * @brief Increments the iterator by moving coordinates to the next (row, column) to iterate on. + * + * This operator internally increments the columns coordinates until wrap-around, when it increments + * the row coordinates and resets the column coordinates to the first possible columns; this column coordinate + * depends on the row coordinates according to the dimensions iteration order and on the parameter \p halo. + * + * @return matrix_generator_iterator& \c this object, with the updated state + */ + matrix_generator_iterator< DIMS, T > & operator++() { + bool must_rewind = increment_column(); + if( must_rewind ) { + this->increment_row(); + // after changing row, we must find the first non-zero column + reset_all_columns(); + current_values.first.first = this->coords_to_rowcol( this->row_coords ); + update_column_max_values(); + } + // trigger column update after row update, as a row update + // triggers a column update + current_values.first.second = this->coords_to_rowcol( col_coords ); + current_values.second = this->v(); + return *this; + } + + /** + * @brief Operator to compare \c this against \p o and return whether they differ. + * + * @param o object to compare \c this against + * @return true of the row or the column is different between \p o and \c this + * @return false if both row and column of \p o and \c this are equal + */ + bool operator!=( const matrix_generator_iterator< DIMS, T > & o ) const { + if( o.i() != this->i() ) { + return true; + } + return o.j() != this->j(); + } + + /** + * @brief Operator to compare \c this against \p o and return whether they are equal. + * + * @param o object to compare \c this against + * @return true of the row or the column is different between \p o and \c this + * @return false if both row and column of \p o and \c this are equal + */ + bool operator==( const matrix_generator_iterator< DIMS, T > & o ) const { + return o.i() == this->i() && o.j() == this->j(); + } + + /** + * @brief Operator returning the triple to directly access row, column and element values. + * + * Useful when building the matrix by copying the triple of coordinates and value, + * like for the BSP1D backend. + */ + const value_type & operator*() const { + return current_values; + } + + /** + * @brief Returns current row. + */ + inline row_coordinate_type i() const { + return current_values.first.first; + } + + /** + * @brief Returns current column. + */ + inline column_coordinate_type j() const { + return current_values.first.second; + } + + /** + * @brief Returns the current matrix value. + * + * @return nonzero_value_type #diagonal_value if \code row == column \endcode (i.e. if \code this-> \endcode + * #i() \code == \endcode \code this-> \endcode #j()), #non_diagonal_value otherwise + */ + inline nonzero_value_type v() const { + return j() == i() ? diagonal_value : non_diagonal_value; + } + + private: + // offsets w.r.t. rows + array_t col_coords; ///< coordinates corresponding to current column + array_t column_max_values; ///< maximum values for the column coordinates, to stop column increment + //// and reset the column coordinates + value_type current_values; ///< triple storing the current value for row, column and matrix element + + /** + * @brief Updates the maximum values each column coordinate can reach, according to the row coordinates. + * + * To be called after each row coordinates update. + */ + void update_column_max_values() { + for( std::size_t i { 0 }; i < column_max_values.size(); i++ ) { + column_max_values[ i ] = std::min( this->physical_sizes[ i ] - 1, this->row_coords[ i ] + halo ); + } + } + + /** + * @brief Resets the value of column dimension \p dim to the first possible value. + * + * The final value of #col_coords[dim] depends on the current row (#row_coords) and on the \p halo + * and is \f$ max(0, \f$ #row_coords \f$[dim])\f$. + * + * @param dim the dimension to reset + */ + void reset_column_coords( std::size_t dim ) { + // cannot use std::max because row_coords is unsigned and can wrap-around + col_coords[ dim ] = this->row_coords[ dim ] <= halo ? 0 : ( this->row_coords[ dim ] - halo ); + } + + /** + * @brief resets all values in #col_coords to the initial coordinates, + * iterating from on the current row. + */ + void reset_all_columns() { + for( std::size_t i { 0 }; i < col_coords.size(); i++ ) { + reset_column_coords( i ); + } + } + + /** + * @brief Increment the column according to the iteration order, thus resetting the column coordinates + * when the last possible column value for the current row has been reached. + * + * @return true if the column coordinates have been reset, and thus also the row must be incremented + * @return false if the column coordinates + */ + bool increment_column() { + bool rewind; + typename array_t::size_type i { 0 }; + do { + typename array_t::value_type & col = col_coords[ i ]; + // must rewind dimension if the column offset is already at the max value + // or if the column coordinates are already at the max value + rewind = ( col == column_max_values[ i ] ); + if( rewind ) { + // col = this->row_coords[i] == 0 ? 0 : this->row_coords[i] - (halo); + reset_column_coords( i ); + } else { + ++col; + } + ++i; + } while( rewind && i < col_coords.size() ); + + // if we change z, then we also must reset x and y; if only y, we must reset x, and so on + return rewind; + } + }; + + // =============================================================== + + /** + * @brief Class to generate the coarsening matrix of an underlying \p DIMS -dimensional system. + * + * This class coarsens a finer system to a coarser system by projecting each input value (column), + * espressed in finer coordinates, to an output (row) value espressed in coarser coordinates. + * The coarser sizes are assumed to be row_generator#physical_sizes, while the finer sizes are here + * stored inside #finer_sizes. + * + * The corresponding refinement matrix is obtained by transposing the coarsening matrix. + * + * @tparam DIMS number of dimensions of the system + * @tparam T type of matrix values + */ + template< std::size_t DIMS, typename T = double > + struct coarsener_generator_iterator : public row_generator< DIMS > { + + using row_coordinate_type = typename row_generator< DIMS >::row_coordinate_type; + using column_coordinate_type = typename row_generator< DIMS >::row_coordinate_type; + using nonzero_value_type = T; + using array_t = typename row_generator< DIMS >::array_t; + using value_type = std::pair< std::pair< row_coordinate_type, column_coordinate_type >, T >; + + // the sizes to project from + const array_t finer_sizes; ///< the size of the finer system (columns) + array_t steps; ///< array of steps, i.e. how much each column coordinate (finer system) must be + //// incremented when incrementing the row coordinates; is is the ration between + //// #finer_sizes and row_generator#physical_sizes + + /** + * @brief Construct a new \c coarsener_generator_iterator object from the coarser and finer sizes, + * setting its row at \p _current_row and the column at the corresponding value. + * + * Each finer size must be an exact multiple of the corresponding coarser size, otherwise the + * construction will throw an exception. + * + * @param _coarser_sizes sizes of the coarser system (rows) + * @param _finer_sizes sizes of the finer system (columns) + * @param _current_row row (in the coarser system) to set the iterator on + */ + coarsener_generator_iterator( const array_t & _coarser_sizes, const array_t & _finer_sizes, row_coordinate_type _current_row ) : + row_generator< DIMS >( _coarser_sizes, _current_row ), finer_sizes( _finer_sizes ), steps( { 0 } ) { + for( std::size_t i { 0 }; i < DIMS; i++ ) { + // finer size MUST be an exact multiple of coarser_size + typename array_t::value_type step { _finer_sizes[ i ] / _coarser_sizes[ i ] }; + if( step == 0 || finer_sizes[ i ] / step != this->physical_sizes[ i ] ) { + throw std::invalid_argument( std::string( "finer size " + "of " + "dimension" + " " ) + + std::to_string( i ) + + std::string( "is not an exact multiple of coarser " + "size" ) ); + } + steps[ i ] = step; + } + current_values.first.first = _current_row; + current_values.first.second = coords_to_finer_col(); + current_values.second = v(); + } + + coarsener_generator_iterator( const coarsener_generator_iterator & o ) = default; + + coarsener_generator_iterator( coarsener_generator_iterator && o ) = default; + + /** + * @brief Increments the row and the column according to the respective physical sizes, + * thus iterating onto the coarsening matrix coordinates. + * + * @return \code *this \endcode, i.e. the same object with the updates row and column + */ + coarsener_generator_iterator< DIMS, T > & operator++() { + this->increment_row(); + current_values.first.first = this->coords_to_rowcol( this->row_coords ); + current_values.first.second = coords_to_finer_col(); + current_values.second = v(); + return *this; + } + + /** + * @brief Returns whether \c this and \p o differ. + */ + bool operator!=( const coarsener_generator_iterator< DIMS, T > & o ) const { + if( this->i() != o.i() ) { + return true; + } + return this->j() != o.j(); + } + + /** + * @brief Returns whether \c this and \p o are equal. + */ + bool operator==( const coarsener_generator_iterator< DIMS, T > & o ) const { + return this->i() == o.i() && this->j() == o.j(); + } + + /** + * @brief Operator returning the triple to directly access row, column and element values. + * + * Useful when building the matrix by copying the triple of coordinates and value, + * like for the BSP1D backend. + */ + const value_type & operator*() const { + return current_values; + } + + /** + * @brief Returns the current row, according to the coarser system. + */ + inline row_coordinate_type i() const { + return current_values.first.first; + } + + /** + * @brief Returns the current column, according to the finer system. + */ + inline column_coordinate_type j() const { + return current_values.first.second; + } + + /** + * @brief Returns always 1, as the coarsening keeps the same value. + */ + inline nonzero_value_type v() const { + return static_cast< nonzero_value_type >( 1 ); + } + + private: + value_type current_values; ///< triple storing the current value for row, column and matrix element + + /** + * @brief Returns the row coordinates converted to the finer system, to compute + * the column value. + */ + column_coordinate_type coords_to_finer_col() const { + column_coordinate_type row { 0 }; + column_coordinate_type s { 1 }; + for( typename array_t::size_type i { 0 }; i < this->row_coords.size(); i++ ) { + s *= steps[ i ]; + row += s * this->row_coords[ i ]; + s *= this->physical_sizes[ i ]; + } + return row; + } + }; + + } // namespace old + } // namespace algorithms +} // namespace grb + +#endif // _H_GRB_ALGORITHMS_NDIM_MATRIX_BUILDERS diff --git a/include/graphblas/algorithms/hpcg/system_building_utils.hpp b/include/graphblas/algorithms/hpcg/system_building_utils.hpp index 11adf82c1..959d21969 100644 --- a/include/graphblas/algorithms/hpcg/system_building_utils.hpp +++ b/include/graphblas/algorithms/hpcg/system_building_utils.hpp @@ -31,10 +31,16 @@ #include #include +#include #include "hpcg_data.hpp" #include "matrix_building_utils.hpp" +#ifndef MASTER_PRINT +#define INTERNAL_MASTER_PRINT +#define MASTER_PRINT( pid, txt ) if( pid == 0 ) { std::cout << txt; } +#endif + namespace grb { namespace algorithms { @@ -92,17 +98,32 @@ namespace grb { // initialize the main (=uncoarsened) system matrix grb::RC rc { grb::SUCCESS }; + const size_t pid { spmd<>::pid() }; + grb::utils::Timer timer; + MASTER_PRINT( pid, "\n-- generating system matrix...\n" << std::endl ); + grb::spmd<>::barrier(); + timer.reset(); rc = build_ndims_system_matrix< DIMS, T >( data->A, params.physical_sys_sizes, params.halo_size, params.diag_value, params.non_diag_value ); + MASTER_PRINT( pid, "\n-- generating system matrix... time (ms) " << timer.time() << std::endl ); if( rc != grb::SUCCESS ) { - std::cerr << "Failure to generate the initial system (" << toString( rc ) << ") of size " << n << std::endl; + MASTER_PRINT( pid, "Failure to generate the initial system (" + << toString( rc ) << ") of size " << n << "\n" ); return rc; } - // set values of diagonal vector + // set values of vectors + MASTER_PRINT( pid, "-- populating vectors..." ); + timer.reset(); set( data->A_diagonal, params.diag_value ); + data->zero_temp_vectors(); + MASTER_PRINT( pid, " time (ms) " << timer.time() << std::endl ); + + MASTER_PRINT( pid, "-- generating color masks...\n" << std::endl ); + timer.reset(); build_static_color_masks( data->color_masks, n, params.num_colors ); + MASTER_PRINT( pid, "\n\n-- generating color masks... time (ms) " << timer.time() << std::endl ); // initialize coarsening with additional pointers and dimensions copies to iterate and divide grb::algorithms::multi_grid_data< T, T > ** coarser = &data->coarser_level; @@ -124,20 +145,45 @@ namespace grb { grb::algorithms::multi_grid_data< double, double > * new_coarser { new grb::algorithms::multi_grid_data< double, double >( coarser_size, previous_size ) }; // install coarser level immediately to cleanup in case of build error *coarser = new_coarser; + + MASTER_PRINT( pid, "-- level " << coarsening_level << "\n\tgenerating coarsening matrix...\n" ); + timer.reset(); // initialize coarsener matrix, system matrix and diagonal vector for the coarser level rc = build_ndims_coarsener_matrix< DIMS >( new_coarser->coarsening_matrix, coarser_sizes, previous_sizes ); if( rc != grb::SUCCESS ) { - std::cerr << "Failure to generate coarsening matrix (" << toString( rc ) << ")." << std::endl; + MASTER_PRINT( pid, "Failure to generate coarsening matrix (" << toString( rc ) << ").\n" ); return rc; } + double coarsener_gen_time{ timer.time() }; + + MASTER_PRINT( pid, "\tgenerating system matrix...\n" ); + timer.reset(); rc = build_ndims_system_matrix< DIMS, T >( new_coarser->A, coarser_sizes, params.halo_size, params.diag_value, params.non_diag_value ); if( rc != grb::SUCCESS ) { - std::cerr << "Failure to generate system matrix (" << toString( rc ) << ")for size " << coarser_size << std::endl; + MASTER_PRINT( pid, "Failure to generate system matrix (" << toString( rc ) + << ") for size " << coarser_size << "\n" ); return rc; } + double coarse_sys_gen_time{ timer.time() }; + + MASTER_PRINT( pid, "\tpopulating vectors...\n" ); + timer.reset(); set( new_coarser->A_diagonal, params.diag_value ); + new_coarser->zero_temp_vectors(); + double coarser_vec_gen_time{ timer.time() }; + // build color masks for coarser level (same masks, but with coarser system size) + MASTER_PRINT( pid, "\tgenerating color masks..." << std::endl ); + timer.reset(); rc = build_static_color_masks( new_coarser->color_masks, coarser_size, params.num_colors ); + double coarse_masks_sys_time{ timer.time() }; + MASTER_PRINT( pid, "-- level " << coarsening_level << "... time (ms) for " + "[coarsening matrix,coarse system matrix,coarser vectors,color masks]:" + << coarsening_level << "," << coarsener_gen_time + << "," << coarse_sys_gen_time + << "," << coarser_vec_gen_time + << "," << coarse_masks_sys_time << std::endl; + ); // prepare for new iteration coarser = &new_coarser->coarser_level; @@ -152,4 +198,9 @@ namespace grb { } // namespace algorithms } // namespace grb +#ifdef INTERNAL_MASTER_PRINT +#undef INTERNAL_MASTER_PRINT +#undef MASTER_PRINT +#endif + #endif // _H_GRB_ALGORITHMS_SYSTEM_BUILDING_UTILS diff --git a/include/graphblas/utils/geometry/array_vector_storage.hpp b/include/graphblas/utils/geometry/array_vector_storage.hpp new file mode 100644 index 000000000..451364754 --- /dev/null +++ b/include/graphblas/utils/geometry/array_vector_storage.hpp @@ -0,0 +1,67 @@ + +#ifndef _ARRAY_VECTOR_STORAGE_H_ +#define _ARRAY_VECTOR_STORAGE_H_ + +#include +#include +#include + +namespace grb { + namespace utils { + namespace geometry { + +template< typename T, std::size_t DIMS > class array_vector_storage: public std::array< T, DIMS > { + +public: + + using vector_storage = std::array< T, DIMS >&; + using const_vector_storage = const std::array< T, DIMS >&; + + array_vector_storage( std::size_t _dimensions ) { + static_assert( DIMS > 0, "cannot allocate 0-sized array" ); + if( _dimensions != DIMS ) { + throw std::invalid_argument("given dimensions must match the type dimensions"); + } + } + + array_vector_storage() = delete; + + // only copy constructor/assignment, since there's no external storage + array_vector_storage( const array_vector_storage< T, DIMS >& o ) noexcept { + std::copy_n( o.cbegin(), DIMS, this->begin() ); + } + + /* + array_vector_storage( array_vector_storage< T >&& o ) { + std::copy_n( o._storage.cbegin(), DIMS, this->_storage.cbegin() ); + } + */ + + array_vector_storage< T, DIMS >& operator=( const array_vector_storage< T, DIMS > &original ) noexcept { + std::copy_n( original.begin(), DIMS, this->begin() ); + return *this; + } + + //array_vector_storage< T, DIMS >& operator=( array_vector_storage< T, DIMS > &&original ) = delete; + + ~array_vector_storage() {} + + constexpr std::size_t dimensions() const { + return DIMS; + } + + inline vector_storage storage() { + return *this; + } + + inline const_vector_storage storage() const { + return *this; + } + +}; + + } // namespace geometry + } // namespace utils +} // namespace grb + +#endif // _ARRAY_VECTOR_STORAGE_H_ diff --git a/include/graphblas/utils/geometry/generic_vector_storage.hpp b/include/graphblas/utils/geometry/generic_vector_storage.hpp new file mode 100644 index 000000000..166dad3b8 --- /dev/null +++ b/include/graphblas/utils/geometry/generic_vector_storage.hpp @@ -0,0 +1,117 @@ + +#ifndef _GENERIC_VECTOR_STORAGE_H_ +#define _GENERIC_VECTOR_STORAGE_H_ + +#include +#include + +namespace grb { + namespace utils { + namespace geometry { + +template< typename T > class generic_vector_storage { + + std::size_t _dimensions; + T* _storage; + + void clean() { + if( this->_storage != nullptr ) { + delete[] this->_storage; + } + } + +public: + + using reference = T&; + using const_reference = const T&; + using iterator = T*; + using const_iterator = const T*; + using pointer = T*; + using const_pointer = const T*; + using vector_storage = T*; + using const_vector_storage = T*; + + generic_vector_storage( std::size_t __dimensions ): + _dimensions( __dimensions ) { + if( __dimensions == 0 ) { + throw std::invalid_argument("dimensions cannot be 0"); + } + this->_storage = new T[ __dimensions ]; + } + + generic_vector_storage() = delete; + + generic_vector_storage( const generic_vector_storage< T >& o ): + _dimensions( o._dimensions ), _storage( new T[ o._dimensions ] ) { + std::copy_n( o._storage, o._dimensions, this->_storage ); + } + + generic_vector_storage( generic_vector_storage< T >&& o ) = delete; + + generic_vector_storage< T >& operator=( const generic_vector_storage< T > &original ) { + if( original._dimensions != this->_dimensions ) { + this->clean(); + this->_storage = new T[ original._dimensions]; + } + this->_dimensions = original._dimensions; + std::copy_n( original._storage, original._dimensions, this->_storage ); + return *this; + } + + generic_vector_storage< T >& operator=( generic_vector_storage< T > &&original ) = delete; + + ~generic_vector_storage() { + this->clean(); + } + + std::size_t dimensions() const { + return this->_dimensions; + } + + inline iterator begin() { + return this->_storage; + } + + inline iterator end() { + return this->_storage + this->_dimensions; + } + + inline const_iterator begin() const { + return this->_storage; + } + + inline const_iterator end() const { + return this->_storage + this->_dimensions; + } + + inline const_iterator cbegin() const { + return this->_storage; + } + + inline const_iterator cend() const { + return this->_storage + this->_dimensions; + } + + inline vector_storage storage() { + return this->_storage; + } + + inline const_vector_storage storage() const { + return this->_storage; + } + + inline reference operator[]( std::size_t pos ) { + return *( this->_storage + pos); + } + + inline const_reference operator[]( std::size_t pos ) const { + return *( this->_storage + pos ); + } + +}; + + } // namespace geometry + } // namespace utils +} // namespace grb + +#endif // _GENERIC_VECTOR_STORAGE_H_ diff --git a/include/graphblas/utils/geometry/linearized_halo_ndim_geometry.hpp b/include/graphblas/utils/geometry/linearized_halo_ndim_geometry.hpp new file mode 100644 index 000000000..4d7fd62ce --- /dev/null +++ b/include/graphblas/utils/geometry/linearized_halo_ndim_geometry.hpp @@ -0,0 +1,232 @@ + +#ifndef _LINEARIZED_HALO_NDIM_GEOMETRY_H_ +#define _LINEARIZED_HALO_NDIM_GEOMETRY_H_ + +#include +#include +#include +#include +#include +#include + +#include "linearized_ndim_system.hpp" +#include "array_vector_storage.hpp" +#include "generic_vector_storage.hpp" +#include "ndim_vector.hpp" + +namespace grb { + namespace utils { + namespace geometry { + +template< typename CoordT, std::size_t DIMS > void __compute_neighbors_range( + const array_vector_storage< CoordT, DIMS >& _system_sizes, + const CoordT halo, + const array_vector_storage< CoordT, DIMS >& system_coordinates, + array_vector_storage< CoordT, DIMS >& neighbors_start, + array_vector_storage< CoordT, DIMS >& neighbors_range ) { + + for( CoordT i{0}; i < DIMS/* - 1*/; i++ ) { + const CoordT start{ system_coordinates[i] <= halo ? 0 : system_coordinates[i] - halo }; + const CoordT end{ std::min( system_coordinates[i] + halo, _system_sizes[i] - 1 ) }; + neighbors_start[i] = start; + neighbors_range[i] = end - start + 1; + } + /* + const std::size_t last{ DIMS - 1 }; + const CoordT start{ system_coordinates[ last ] <= halo ? 0 : system_coordinates[ last ] - halo }; + const CoordT end{ system_coordinates[ last ] + halo }; // can extend beyond actual DIMS-dimensional space + neighbors_start[ last ] = start; + neighbors_range[ last ] = end - start + 1; + */ +} + + + + + + +template< typename CoordT, std::size_t DIMS > std::size_t __neighbour_to_system_coords( + const std::array< CoordT, DIMS > & sizes, + std::size_t system_size, + const std::vector< ndim_vector< CoordT, CoordT, generic_vector_storage< CoordT > > > & dimension_neighbors, + CoordT halo, + CoordT neighbor, + array_vector_storage< CoordT, DIMS > & result) { + + if( neighbor > system_size ) { + throw std::invalid_argument("neighbor number ( " + std::to_string(neighbor) + + " ) >= system size ( " + std::to_string( system_size ) + " )"); + } + + array_vector_storage< CoordT, DIMS > halo_coords( DIMS ); +#ifdef DBG + std::size_t * const halo_coords_end{ halo_coords.data() + DIMS }; +#endif + std::fill_n( halo_coords.begin(), DIMS, 0 ); + + for( std::size_t _dim{DIMS}; _dim > 0; _dim--) { + + const std::size_t dimension{_dim - 1}; + const std::size_t dimension_size{ sizes[dimension] }; + const ndim_vector< CoordT, CoordT, generic_vector_storage< CoordT > > & neighbors{ dimension_neighbors[dimension] }; + + CoordT * const halo_coords_begin{ halo_coords.data() + dimension }; + +#ifdef DBG + std::cout << "DIMENSION " << dimension << std::endl << "- setup - neighbour " << neighbor << std::endl; + std::cout << "\thalo : "; + print_sequence( halo_coords_begin, halo_coords_end ) << std::endl; +#endif + + std::size_t h{0}; + std::size_t previous_neighs{ 0 }; + *halo_coords_begin = h; + std::size_t halo_max_neighs{ neighbors.at( halo_coords_begin ) }; + //std::cout << "\tinitial halo_max_neighs " << halo_max_neighs << std::endl; + while( h < halo && neighbor >= previous_neighs + halo_max_neighs ) { + h++; + *halo_coords_begin = h; + previous_neighs += halo_max_neighs; + halo_max_neighs = neighbors.at( halo_coords_begin ); + } +#ifdef DBG + std::cout << "- initial halo - neighbour " << neighbor << std::endl; + std::cout << "\th " << h << std::endl; + std::cout << "\thalo : "; + print_sequence( halo_coords_begin, halo_coords_end ) << std::endl; + std::cout << "\thalo_max_neighs " << halo_max_neighs << std::endl; +#endif + + + if ( h < halo ){ + result[dimension] = h; + neighbor -= previous_neighs; +#ifdef DBG + std::cout << "end neighbour " << neighbor << std::endl; +#endif + continue; + } + // saturation occurred + const std::size_t distance_from_halo{ ( neighbor - previous_neighs ) / halo_max_neighs }; +#ifdef DBG + std::cout << "- before middle elements - neighbour " << neighbor << std::endl; + std::cout << "\tprevious_neighs " << previous_neighs << std::endl; + std::cout << "\thalo_max_neighs " << halo_max_neighs << std::endl; + std::cout << "\tdistance_from_halo " << distance_from_halo << std::endl; + std::cout << "\tdimension_size " << dimension_size << std::endl; +#endif + if ( distance_from_halo < dimension_size - 2 * halo ) { + result[dimension] = distance_from_halo + halo; + neighbor -= (previous_neighs + distance_from_halo * halo_max_neighs) ; +#ifdef DBG + std::cout << "end neighbour " << neighbor << std::endl; +#endif + continue; + } + previous_neighs += ( dimension_size - 2 * halo ) * halo_max_neighs; +#ifdef DBG + std::cout << "- after middle elements -neighbour " << neighbor << std::endl; + std::cout << "\tprevious_neighs " << previous_neighs << std::endl; + std::cout << "\thalo_max_neighs " << halo_max_neighs << std::endl; +#endif + + h = halo - 1; + *halo_coords_begin = h; + halo_max_neighs = neighbors.at( halo_coords_begin ); + while( h > 0 && neighbor >= previous_neighs + halo_max_neighs ) { + h--; + *halo_coords_begin = h; + previous_neighs += halo_max_neighs; + halo_max_neighs = neighbors.at( halo_coords_begin ); + } + neighbor -= previous_neighs; +#ifdef DBG + std::cout << "- final halo - neighbour " << neighbor << std::endl; + std::cout << "\tadding h " << h << " previous_neighs " << previous_neighs << std::endl; +#endif + // ( dimension_size - 1 ) because coordinates are 0-based and neighbor + // is "inside" range [ previous_neighs, previous_neighs + halo_max_neighs ] + result[dimension] = dimension_size - 1 - h; +#ifdef DBG + std::cout << "end neighbour " << neighbor << std::endl; +#endif + } + + return neighbor; +} + + +template< typename CoordT > std::size_t __accumulate_dimension_neighbours( + const ndim_vector< CoordT, CoordT, generic_vector_storage< CoordT > >& prev_neighs, + CoordT* coords_buffer, + std::size_t halo, + std::size_t local_size ) { + std::size_t neighs{0}; + std::size_t h{0}; + for( ; h < halo && local_size > 1; h++ ) { + *coords_buffer = h; + + const std::size_t local_neighs{ prev_neighs.at( coords_buffer ) }; + neighs += 2 * local_neighs; // the 2 sides + local_size -= 2; + } + *coords_buffer = h; + neighs += local_size * prev_neighs.at( coords_buffer ); // innermost elements + return neighs; +} + +template< typename CoordT > void __populate_halo_neighbors( std::size_t halo, + ndim_vector< CoordT, CoordT, generic_vector_storage< CoordT > >& container ) { + + using it_type = typename ndim_vector< CoordT, CoordT, generic_vector_storage< CoordT > >::domain_iterator; + it_type end{ container.domain_end() }; + for( it_type it{ container.domain_begin() }; it != end; ++it ) { + std::size_t res{1}; + for( std::size_t h: it->get_position() ) res *= (h + 1 + halo); + container.at( it->get_position() ) = res; + } +} + +template< typename CoordT, std::size_t DIMS > std::size_t __init_halo_search( + typename linearized_ndim_system< CoordT, array_vector_storage< CoordT, DIMS > >::const_vector_reference sizes, + std::size_t halo, + std::vector< ndim_vector< CoordT, CoordT, generic_vector_storage< CoordT > > >& dimension_limits ) { + + using nd_vec = ndim_vector< CoordT, CoordT, generic_vector_storage< CoordT > >; + using nd_vec_iterator = typename nd_vec::domain_iterator; + + std::vector halo_sizes( DIMS, halo + 1); + dimension_limits.emplace_back(halo_sizes); + + // initialize values + __populate_halo_neighbors< CoordT >( halo, dimension_limits[0] ); + for( std::size_t i{1}; i < DIMS; i++ ) { + std::vector halos( DIMS - i, halo + 1 ); + dimension_limits.emplace_back(halos); + } + + std::array< CoordT, DIMS > prev_coords_buffer; // store at most DIMS values + CoordT* const prev_coords{ prev_coords_buffer.data() }; + CoordT* const second{ prev_coords + 1 }; // store previous coordinates from second position + for( std::size_t dimension{1}; dimension < DIMS; dimension++ ) { + const nd_vec& prev_neighs{dimension_limits[dimension - 1]}; + nd_vec& current_neighs{dimension_limits[dimension]}; + + nd_vec_iterator end{ current_neighs.domain_end() }; + for( nd_vec_iterator it{ current_neighs.domain_begin() }; it != end; ++it ) { + typename nd_vec::const_domain_vector_reference current_halo_coords{ it->get_position() }; + + std::copy( it->get_position().cbegin(), it->get_position().cend(), second ); + std::size_t local_size{ sizes[dimension - 1] }; + const std::size_t neighs{ __accumulate_dimension_neighbours(prev_neighs, prev_coords, halo, local_size) }; + current_neighs.at(current_halo_coords) = neighs; + } + } + return __accumulate_dimension_neighbours( dimension_limits[DIMS - 1], prev_coords, halo, sizes.back() ); +} + + } // namespace geometry + } // namespace utils +} // namespace grb + +#endif // _LINEARIZED_HALO_NDIM_GEOMETRY_H_ diff --git a/include/graphblas/utils/geometry/linearized_halo_ndim_iterator.hpp b/include/graphblas/utils/geometry/linearized_halo_ndim_iterator.hpp new file mode 100644 index 000000000..ede3af52c --- /dev/null +++ b/include/graphblas/utils/geometry/linearized_halo_ndim_iterator.hpp @@ -0,0 +1,377 @@ + +#ifndef _LINEARIZED_HALO_NDIM_ITERATOR_H_ +#define _LINEARIZED_HALO_NDIM_ITERATOR_H_ + +#include +#include +#include +#include +#include + +#include "linearized_ndim_system.hpp" +#include "array_vector_storage.hpp" +#include "linearized_ndim_iterator.hpp" + +namespace grb { + namespace utils { + namespace geometry { + +// forward declaration +template< typename CoordT, std::size_t DIMS > class linearized_halo_ndim_system; + +template< typename CoordT, std::size_t DIMS > class linearized_halo_ndim_iterator { + + using system_t = linearized_halo_ndim_system< CoordT, DIMS >; + using vector_t = array_vector_storage< CoordT, DIMS >; + using vector_iter = linearized_ndim_iterator< CoordT, vector_t >; +public: + + //using vector_t = typename vector_iter::vector_t; + using const_vector_reference = typename vector_iter::const_vector_reference; + + + + struct halo_ndim_point { + private: + + // for linearization + const system_t* _system; + + // for iteration + vector_iter _element_iter; // coordinates iterator + + //vector_t* _element; + //std::size_t _coordinates_linear; + vector_t _neighbor; //the actual neighbor + //std::size_t _neighbor_linear; + CoordT _position; + + public: + + friend linearized_halo_ndim_iterator< CoordT, DIMS>; + + halo_ndim_point() = delete; + + halo_ndim_point( const halo_ndim_point& ) = default; + + halo_ndim_point( halo_ndim_point&& ) = delete; + + halo_ndim_point( const system_t& system ) noexcept : + _system( &system ), + _element_iter( system ), + _neighbor( DIMS ), + _position( 0 ) + { + std::fill_n( this->_neighbor.begin(), DIMS, 0 ); + } + + halo_ndim_point& operator=( const halo_ndim_point& ) = default; + + //halo_ndim_point& operator=( halo_ndim_point&& ) = delete; + + const_vector_reference get_element() const { + return this->_element_iter->get_position(); + } + + std::size_t get_element_linear() const { + return this->_system->ndim_to_linear( this->_element_iter->get_position() ); + } + + const_vector_reference get_neighbor() const { + return this->_neighbor; + } + + std::size_t get_neighbor_linear() const { + return this->_system->ndim_to_linear( this->_neighbor ); + } + + CoordT get_position() const { + return this->_position; + } + }; + + + + + + + using const_point_reference = const struct halo_ndim_point&; + using const_point_pointer = const struct halo_ndim_point*; + + // interface for std::random_access_iterator + using iterator_category = std::random_access_iterator_tag; + using value_type = halo_ndim_point; + using pointer = const halo_ndim_point*; + using reference = const halo_ndim_point&; + using difference_type = signed long; + +private: + + halo_ndim_point _point; + linearized_ndim_system< CoordT, vector_t > _neighbors_linearizer; + vector_iter _neighbor_iter; // iterator in the sub-space of neighbors (0-based) + vector_t _neighbors_start; + vector_iter _neighbor_end; + + inline void __update_neighbor() { + for( std::size_t i{0}; i < DIMS; i++ ) { + //(this->_point)._neighbor[i] = this->_neighbors_start[i] + (*(this->_neighbor_iter))[i]; + this->_point._neighbor[i] = this->_neighbors_start[i] + this->_neighbor_iter->get_position()[i]; + } + } + + /* + void __update_neighbor_linear() { + (this->_point)._neighbor_linear = + this->_system.ndim_to_linear( this->_point._neighbor ); + } + */ + + inline void on_neighbor_iter_update() { + this->__update_neighbor(); + //this->__update_neighbor_linear(); + } + + /* + void __update_coordinates_linear() { + (this->_point)._coordinates_linear = + this->_system.ndim_to_linear( *this->_element_iter ); + } + */ + + void on_element_update() { + //this->__update_coordinates_linear(); + // reset everything + vector_t neighbors_range( DIMS ); + this->_point._system->compute_neighbors_range( + //*(this->_point._element_iter), + this->_point._element_iter->get_position(), + this->_neighbors_start, + neighbors_range + ); + /* + std::cout << "\t=== start "; + print( this->_neighbors_start ) << " range "; + print( neighbors_range ) << std::endl; + */ + // re-target _neighbors_linearizer + this->_neighbors_linearizer.retarget( neighbors_range ); + } + + void on_element_advance() { + this->on_element_update(); + + this->_neighbor_iter = vector_iter( this->_neighbors_linearizer ); + this->_neighbor_end = vector_iter::make_system_end_iterator( this->_neighbors_linearizer ); + + this->on_neighbor_iter_update(); + } + +public: + + linearized_halo_ndim_iterator() = delete; + + linearized_halo_ndim_iterator( const system_t& system ) noexcept : + _point( system ), + _neighbors_linearizer( DIMS, system.halo() + 1 ), + _neighbor_iter( this->_neighbors_linearizer ), + _neighbors_start( DIMS ), + _neighbor_end( vector_iter::make_system_end_iterator( this->_neighbors_linearizer ) ) + { + std::fill_n( this->_neighbors_start.begin(), DIMS, 0 ); + } + + + /* + linearized_halo_ndim_iterator( const linearized_halo_ndim_iterator< CoordT, DIMS >& original ) noexcept: + _coordinates_linearizer( original._coordinates_linearizer ), + _halo( original._halo ), + _dimension_limits( original._dimension_limits ), + _neighbors_linearizer( original._neighbors_linearizer ), + _element_iter( original._element_iter ), + _neighbor_iter( original._neighbor_iter ), + _neighbor_end( original._neighbor_end ), + _neighbors_start( original._neighbors_start ), + _point( original._point ) {} + */ + + linearized_halo_ndim_iterator( const linearized_halo_ndim_iterator< CoordT, DIMS >& ) = default; + + //linearized_halo_ndim_iterator( linearized_halo_ndim_iterator< CoordT, DIMS >&& original ) = delete; + + /* + linearized_halo_ndim_iterator< CoordT, DIMS >& operator=( + const linearized_halo_ndim_iterator< CoordT, DIMS >& original ) noexcept { + this->_coordinates_linearizer = original._coordinates_linearizer; + this->_halo = original._halo; + this->_dimension_limits = original._dimension_limits; + this->_neighbors_linearizer = original._neighbors_linearizer; + this->_element_iter = original._element_iter; + this->_coordinates_linear = original._coordinates_linear; + this->_neighbor_iter = original._neighbor_iter; + this->_neighbor_end = original._neighbor_end; + this->_neighbor = original._neighbor; + this->_neighbors_start = original._neighbors_start; + this->_neighbor_linear = original._neighbor_linear; + } + */ + + linearized_halo_ndim_iterator< CoordT, DIMS >& operator=( const linearized_halo_ndim_iterator< CoordT, DIMS >& ) = default; + + //linearized_halo_ndim_iterator< CoordT, DIMS >& operator=( linearized_halo_ndim_iterator< CoordT, DIMS >&& ) = delete; + + bool operator!=( const linearized_halo_ndim_iterator< CoordT, DIMS >& other ) const { + //return (this->_point)._coordinates_linear != (other._point)._coordinates_linear + // || (this->_point)._neighbor_linear != (other._point)._neighbor_linear; + return this->_point._position != other._point._position; // use linear coordinate + } + + const_point_reference operator*() const { + return this->_point; + } + + const_point_pointer operator->() const { + return &(this->_point); + } + + bool has_more_neighbours() const { + return this->_neighbor_iter != this->_neighbor_end; + } + + void next_neighbour() { + /* + std::cout << "sizes: " << this->_neighbors_linearizer.get_sizes() + << " offset " << this->_neighbor_iter->get_position() << " -> " + << this->_neighbors_linearizer.ndim_to_linear_offset( this->_neighbor_iter->get_position() ) + << std::endl; + */ + ++(this->_neighbor_iter); + this->on_neighbor_iter_update(); + this->_point._position++; + } + + bool has_more_elements() const { + return this->_point.get_element_linear() != (this->_point._system)->base_system_size(); + } + + void next_element() { + std::size_t num_neighbours = this->_neighbors_linearizer.system_size(); + std::size_t neighbour_position_offset = + this->_neighbors_linearizer.ndim_to_linear_offset( this->_neighbor_iter->get_position() ); + // std::cout << " num_neighbours " << num_neighbours << " offset " << neighbour_position_offset << std::endl; + ++(this->_point._element_iter); + this->on_element_advance(); + // this->_point._position++; + this->_point._position -= neighbour_position_offset; + this->_point._position += num_neighbours; + } + + linearized_halo_ndim_iterator< CoordT, DIMS >& operator++() noexcept { + ++(this->_neighbor_iter); + if( !has_more_neighbours() ) { + ++(this->_point._element_iter); + //this->_coordinates_linear = this->_coordinates_linearizer.ndim_to_linear( this->_element_iter ); + this->on_element_advance(); + + } else { + this->on_neighbor_iter_update(); + } + this->_point._position++; + return *this; + } + + + + linearized_halo_ndim_iterator< CoordT, DIMS >& operator+=( std::size_t offset ) { + if( offset == 1UL ) { + return this->operator++(); + } + const std::size_t final_position { this->_point._position + offset }; + if( final_position > this->_point._system->halo_system_size() ) { + throw std::range_error( "neighbor linear value beyond system" ); + } + vector_t final_element( DIMS ); + std::size_t neighbor_index{ (this->_point._system->neighbour_linear_to_element( final_position, final_element )) }; + + // std::cout << "\t=== element " << offset << " -- "; + // std::cout << final_element[0] << " " << final_element[0] << std::endl; + + this->_point._element_iter = vector_iter( *this->_point._system, final_element.cbegin() ); + //this->_point._element = &( *this->_element_iter ); + this->_point._position = final_position; + + this->on_element_update(); + this->_neighbors_linearizer.linear_to_ndim( neighbor_index, final_element ); + + this->_neighbor_iter = vector_iter( this->_neighbors_linearizer, final_element.cbegin() ); + this->_neighbor_end = vector_iter::make_system_end_iterator( this->_neighbors_linearizer ); + this->on_neighbor_iter_update(); + + return *this; + } + + difference_type operator-( const linearized_halo_ndim_iterator< CoordT, DIMS >& other ) const { + /* + if( _point.get_position() < a_point.get_position() ) { + throw std::invalid_argument( "first iterator is in a lower position than second" ); + } + */ + std::size_t a_pos{ _point.get_position() }, b_pos{ other._point.get_position() }; + // std::cout << "diff " << a_pos << " - " << b_pos << std::endl; + std::size_t lowest{ std::min( a_pos, b_pos ) }, highest{ std::max( a_pos, b_pos )}; + using diff_t = typename linearized_halo_ndim_iterator< CoordT, DIMS >::difference_type; + + if( highest - lowest > static_cast< std::size_t >( + std::numeric_limits< diff_t >::max() ) ) { + throw std::invalid_argument( "iterators are too distant" ); + } + + return ( static_cast< diff_t >( a_pos - b_pos ) ); + } + + + + + // implementation depending on logic in operator++ + static linearized_halo_ndim_iterator< CoordT, DIMS > make_system_end_iterator( + const system_t& system + ) { + linearized_halo_ndim_iterator< CoordT, DIMS > result( system ); + + /* + std::cout << "result 0: element "; + print(result->get_element()) << " neighbor "; + print(result->get_neighbor()) << std::endl; + */ + + // go to the very first point outside of space + result._point._element_iter = vector_iter::make_system_end_iterator( system ); + /* + std::cout << "result 1: element "; + print(result->get_element()) << " neighbor "; + print(result->get_neighbor()) << std::endl; + */ + + result.on_element_advance(); + result._point._position = system.halo_system_size(); + //std::cout << "got sys size " << system.halo_system_size() << std::endl; + + return result; + } + +}; + +/* +template< typename CoordT, std::size_t DIMS > linearized_halo_ndim_iterator< CoordT, DIMS > + operator+( const linearized_halo_ndim_iterator< CoordT, DIMS >& original, std::size_t increment ) { + linearized_halo_ndim_iterator< CoordT, DIMS > res( original ); + return ( res += increment ); +} +*/ + + + } // namespace geometry + } // namespace utils +} // namespace grb + +#endif // _LINEARIZED_HALO_NDIM_ITERATOR_H_ diff --git a/include/graphblas/utils/geometry/linearized_halo_ndim_system.hpp b/include/graphblas/utils/geometry/linearized_halo_ndim_system.hpp new file mode 100644 index 000000000..f915492ac --- /dev/null +++ b/include/graphblas/utils/geometry/linearized_halo_ndim_system.hpp @@ -0,0 +1,111 @@ + +#ifndef _LINEARIZED_HALO_NDIM_SYSTEM_H_ +#define _LINEARIZED_HALO_NDIM_SYSTEM_H_ + +#include +#include +#include +#include + +#include "array_vector_storage.hpp" +#include "linearized_ndim_system.hpp" +#include "linearized_halo_ndim_geometry.hpp" +#include "linearized_halo_ndim_iterator.hpp" + +namespace grb { + namespace utils { + namespace geometry { + +// only with array_vector_storage +template< typename CoordT, std::size_t DIMS > class linearized_halo_ndim_system: + public linearized_ndim_system< CoordT, array_vector_storage< CoordT, DIMS > > { +public: + + using iterator = linearized_halo_ndim_iterator< CoordT, DIMS >; + using const_vector_reference = typename array_vector_storage< CoordT, DIMS >::const_vector_storage; + using self_t = linearized_halo_ndim_system< CoordT, DIMS >; + using base_t = linearized_ndim_system< CoordT, array_vector_storage< CoordT, DIMS > >; + + linearized_halo_ndim_system( const_vector_reference sizes, CoordT halo ): + base_t( sizes.cbegin(), sizes.cend() ), + _halo( halo ) { + + for( CoordT __size : sizes ) { + if ( __size < 2 * halo + 1 ) { + throw std::invalid_argument( + std::string( "the halo (" + std::to_string(halo) + + std::string( ") goes beyond a system size (" ) + + std::to_string( __size) + std::string( ")" ) ) ); + } + } + + this->_system_size = __init_halo_search< CoordT, DIMS >( + this->get_sizes(), + _halo, this->_dimension_limits ); + assert( this->_dimension_limits.size() == DIMS ); + } + + linearized_halo_ndim_system() = delete; + + linearized_halo_ndim_system( const self_t & ) = default; + + linearized_halo_ndim_system( self_t && ) = delete; + + ~linearized_halo_ndim_system() noexcept {} + + self_t & operator=( const self_t & ) = default; + + self_t & operator=( self_t && ) = delete; + + iterator begin() const { + return iterator( *this ); + } + + iterator end() const { + return iterator::make_system_end_iterator( *this ); + } + + std::size_t halo_system_size() const { + return this->_system_size; + } + + std::size_t base_system_size() const { + return this->base_t::system_size(); + } + + std::size_t halo() const { + return this->_halo; + } + + void compute_neighbors_range( + const array_vector_storage< CoordT, DIMS >& system_coordinates, + array_vector_storage< CoordT, DIMS >& neighbors_start, + array_vector_storage< CoordT, DIMS >& neighbors_range) const noexcept { + __compute_neighbors_range( this->get_sizes(), + this->_halo, + system_coordinates, + neighbors_start, + neighbors_range + ); + } + + std::size_t neighbour_linear_to_element ( + CoordT neighbor, + array_vector_storage< CoordT, DIMS > & result) const noexcept { + return __neighbour_to_system_coords( this->get_sizes(), + this->_system_size, this->_dimension_limits, this->_halo, neighbor, result ); + } + +private: + + const CoordT _halo; + std::vector< ndim_vector< CoordT, CoordT, generic_vector_storage< CoordT > > > _dimension_limits; + std::size_t _system_size; + +}; + + } // namespace geometry + } // namespace utils +} // namespace grb + +#endif // _LINEARIZED_HALO_NDIM_SYSTEM_H_ diff --git a/include/graphblas/utils/geometry/linearized_ndim_iterator.hpp b/include/graphblas/utils/geometry/linearized_ndim_iterator.hpp new file mode 100644 index 000000000..20a6473cc --- /dev/null +++ b/include/graphblas/utils/geometry/linearized_ndim_iterator.hpp @@ -0,0 +1,178 @@ + +#ifndef _NDIM_ITERATOR_H_ +#define _NDIM_ITERATOR_H_ + +#include +#include +#include +#include +#include + +#include "array_vector_storage.hpp" + + +namespace grb { + namespace utils { + namespace geometry { + +// forward declaration for default +template< typename T, typename StorageT > class linearized_ndim_system; + +template< typename T, typename StorageT > class linearized_ndim_iterator { +public: + + using storage_t = StorageT; + using lin_t = linearized_ndim_system< T, storage_t >; + using const_vector_reference = const storage_t&; + using self_t = linearized_ndim_iterator< T, StorageT >; + + struct ndim_point { + private: + + const lin_t* system; // pointer because of copy assignment + storage_t coords; + + public: + + friend self_t; + + ndim_point() = delete; + + ndim_point( const ndim_point& ) = default; + + ndim_point( ndim_point&& ) = delete; + + ndim_point( const lin_t& _system ) noexcept : + system( &_system ), + coords( _system.dimensions() ) + { + std::fill_n( this->coords.begin(), _system.dimensions(), 0 ); + } + + ndim_point& operator=( const ndim_point& ) = default; + + inline const_vector_reference get_position() const { + return coords; + } + + std::size_t get_linear_position() const { + return system->ndim_to_linear( coords ); + } + }; + + + // interface for std::random_access_iterator + using iterator_category = std::random_access_iterator_tag; + using value_type = ndim_point; + using pointer = const value_type*; + using reference = const value_type&; + using difference_type = signed long; + + linearized_ndim_iterator( const lin_t &_system ) noexcept : + _p( _system ) + {} + + template< typename IterT > linearized_ndim_iterator( const lin_t &_system, IterT begin ) noexcept : + _p( _system ) + { + std::copy_n( begin, _system.dimensions(), this->_p.coords.begin() ); + } + + linearized_ndim_iterator() = delete; + + linearized_ndim_iterator( const self_t& original ): + _p( original._p ) {} + + self_t& operator=( const self_t& original ) = default; + + //linearized_ndim_iterator( self_t&& original ) = delete; + + //self_t operator=( self_t&& ) = delete; + + ~linearized_ndim_iterator() {} + + self_t & operator++() noexcept { + bool rewind{ true }; + // rewind only the first N-1 coordinates + for( std::size_t i { 0 }; i < this->_p.system->dimensions() - 1 && rewind; i++ ) { + T& coord = this->_p.coords[ i ]; + // must rewind dimension if we wrap-around + /* + T new_coord = ( coord + 1 ) % this->_p.system->get_sizes()[ i ]; + rewind = new_coord < coord; + coord = new_coord; + */ + T plus = coord + 1; + rewind = plus >= this->_p.system->get_sizes()[ i ]; + coord = rewind ? 0 : plus; + } + // if we still have to rewind, increment the last coordinate, which is unbounded + if( rewind ) { + this->_p.coords[ this->_p.system->dimensions() - 1 ]++; + } + return *this; + } + + self_t & operator+=( std::size_t offset ) { + std::size_t linear{ _p.get_linear_position() + offset }; + if( linear > _p.system->system_size() ) { + throw std::invalid_argument("increment is too large"); + } + _p.system->linear_to_ndim( linear, _p.coords ); + return *this; + } + + difference_type operator-( const self_t &other ) const { + std::size_t a_pos{ _p.get_linear_position() }, + b_pos{ other._p.get_linear_position() }; + std::size_t lowest{ std::min( a_pos, b_pos ) }, highest{ std::max( a_pos, b_pos )}; + + if( highest - lowest > static_cast< std::size_t >( + std::numeric_limits< difference_type >::max() ) ) { + throw std::invalid_argument( "iterators are too distant" ); + } + + return ( static_cast< difference_type >( a_pos - b_pos ) ); + } + + reference operator*() const { + return this->_p; + } + + pointer operator->() const { + return &( this->_p ); + } + + bool operator!=( const self_t &o ) const { + const std::size_t dims{ this->_p.system->dimensions() }; + if( dims != o._p.system->dimensions() ) { + throw std::invalid_argument("system sizes do not match"); + } + bool equal{ true }; + for( std::size_t i{0}; i < dims && equal; i++) { + equal &= ( this->_p.coords[i] == o._p.coords[i] ); + } + return !equal; + } + + // implementation depending on logic in operator++ + static self_t + make_system_end_iterator( const lin_t &_system ) { + // fill with 0s + self_t iter( _system ); + std::size_t last{ iter->system->dimensions() - 1 }; + // store last size in last position + iter._p.coords[ last ] = iter->system->get_sizes()[ last ]; + return iter; + } + +private: + ndim_point _p; + +}; + + } // namespace geometry + } // namespace utils +} // namespace grb + +#endif // _NDIM_ITERATOR_H_ diff --git a/include/graphblas/utils/geometry/linearized_ndim_system.hpp b/include/graphblas/utils/geometry/linearized_ndim_system.hpp new file mode 100644 index 000000000..2916208ed --- /dev/null +++ b/include/graphblas/utils/geometry/linearized_ndim_system.hpp @@ -0,0 +1,174 @@ + +#ifndef _NDIM_SYSTEM_LINEARIZER_H_ +#define _NDIM_SYSTEM_LINEARIZER_H_ + +#include +#include +#include +#include +#include +#include +#include + +#include "ndim_system.hpp" +#include "linearized_ndim_iterator.hpp" +#include "array_vector_storage.hpp" + + +namespace grb { + namespace utils { + namespace geometry { + +template< typename IterIn, typename IterOut > + std::size_t __compute_offsets( IterIn in_begin, IterIn in_end, IterOut out_begin ) { + std::size_t prod{1}; + for( ; in_begin != in_end; ++in_begin, ++out_begin ) { + *out_begin = prod; + prod *= *in_begin; + } + return prod; +} + +// container for system sizes, doing only ndim <--> linear translation +template< typename T, typename StorageT > class linearized_ndim_system: + public ndim_system< T, StorageT > { +public: + + using base_t = ndim_system< T, StorageT >; + using storage_t = StorageT; + using self_t = linearized_ndim_system< T, StorageT >; + + using vector_reference = typename base_t::vector_reference; + using const_vector_reference = typename base_t::const_vector_reference; + using vector_storage = typename storage_t::vector_storage; + using const_vector_storage = typename storage_t::const_vector_storage; + using iterator = linearized_ndim_iterator< T, storage_t >; + + template< typename IterT > linearized_ndim_system( IterT begin, IterT end) noexcept : + base_t( begin, end ), + offsets( std::distance( begin, end ) ) + { + this->_system_size = __compute_offsets( begin, end, this->offsets.begin() ) ; + } + + linearized_ndim_system() = delete; + + linearized_ndim_system( const self_t &original ) = default; + + + linearized_ndim_system( self_t &&original ) noexcept: + base_t( std::move(original) ), offsets( std::move( original.offsets ) ), + _system_size( original._system_size ) { + original._system_size = 0; + } + + linearized_ndim_system( const std::vector & _sizes ) noexcept : + linearized_ndim_system( _sizes.cbegin(), _sizes.cend() ) {} + + linearized_ndim_system( std::size_t _dimensions, std::size_t max_value ) noexcept : + base_t( _dimensions, max_value ), + offsets( _dimensions ), + _system_size( _dimensions ) + { + T v{1}; + for( std::size_t i{0}; i < _dimensions; i++ ) { + this->offsets[i] = v; + v *= max_value; + } + this->_system_size = v; + } + + ~linearized_ndim_system() {} + + self_t& operator=( const self_t & ) = default; + + //linearized_ndim_system& operator=( linearized_ndim_system &&original ) = delete; + + inline std::size_t system_size() const { + return this->_system_size; + } + + inline const_vector_reference get_offsets() const { + return this->offsets; + } + + void linear_to_ndim(std::size_t linear, vector_reference output ) const { + if( linear > this->_system_size ) { + throw std::range_error( "linear value beyond system" ); + } + for( std::size_t _i{ this->offsets.dimensions() }; _i > 0; _i-- ) { + const std::size_t dim{ _i - 1 }; + const std::size_t coord{ linear / this->offsets[dim] }; + output[dim] = coord; + linear -= ( coord * this->offsets[dim] ); + } + assert( linear == 0 ); + } + + std::size_t ndim_to_linear_check( const_vector_reference ndim_vector) const { + return this->ndim_to_linear_check( ndim_vector.storage() ); + } + + std::size_t ndim_to_linear_check( const_vector_storage ndim_vector ) const { + std::size_t linear { 0 }; + for( std::size_t i { 0 }; i < this->dimensions(); i++ ) { + if( ndim_vector[i] >= this->get_sizes()[i] ) { + throw std::invalid_argument( "input vector beyond system sizes" ); + } + } + return ndim_to_linear( ndim_vector ); + } + + std::size_t ndim_to_linear( const_vector_reference ndim_vector) const { + return this->ndim_to_linear( ndim_vector.storage() ); + } + + std::size_t ndim_to_linear( const_vector_storage ndim_vector ) const { + std::size_t linear { 0 }; + for( std::size_t i { 0 }; i < this->dimensions(); i++ ) { + linear += this->offsets[i] * ndim_vector[i]; + } + return linear; + } + + std::size_t ndim_to_linear_offset( const_vector_storage ndim_vector ) const { + std::size_t linear { 0 }; + std::size_t steps{ 1 }; + for( std::size_t i { 0 }; i < this->dimensions(); i++ ) { + linear += steps * ndim_vector[i]; + steps *= this->_sizes[i]; + } + return linear; + } + + // must be same dimensionality + void retarget( const_vector_reference _new_sizes ) { + if( _new_sizes.dimensions() != this->_sizes.dimensions() ) { + throw std::invalid_argument("new system must have same dimensions as previous: new " + + std::to_string( _new_sizes.dimensions() ) + ", old " + + std::to_string( this->_sizes.dimensions() ) ); + } + this->_sizes = _new_sizes; // copy + this->_system_size = __compute_offsets( _new_sizes.begin(), _new_sizes.end(), this->offsets.begin() ) ; + } + + iterator begin() const { + return iterator( *this ); + } + + iterator end() const { + return iterator::make_system_end_iterator( *this ); + } + +private: + storage_t offsets; + std::size_t _system_size; + +}; + + + } // namespace geometry + } // namespace utils +} // namespace grb + +#endif // _NDIM_SYSTEM_LINEARIZER_H_ diff --git a/include/graphblas/utils/geometry/ndim_system.hpp b/include/graphblas/utils/geometry/ndim_system.hpp new file mode 100644 index 000000000..41434f3c4 --- /dev/null +++ b/include/graphblas/utils/geometry/ndim_system.hpp @@ -0,0 +1,69 @@ + +#ifndef _NDIM_SYSTEM_H_ +#define _NDIM_SYSTEM_H_ + +#include +#include +#include +#include + +#include "array_vector_storage.hpp" + + +namespace grb { + namespace utils { + namespace geometry { + +template< typename T, typename StorageT > class ndim_system { + +public: + using storage_t = StorageT; + using vector_reference = storage_t&; + using const_vector_reference = const storage_t&; + using self_t = ndim_system< T, StorageT >; + + template< typename IterT > ndim_system( IterT begin, IterT end) noexcept : + _sizes( std::distance( begin, end ) ) + { + std::copy( begin, end, this->_sizes.begin() ); + } + + ndim_system() = delete; + + ndim_system( const self_t & ) = default; + + ndim_system( const std::vector & _sizes ) noexcept : + self_t( _sizes.cbegin(), _sizes.cend() ) {} + + ndim_system( std::size_t _dimensions, std::size_t max_value ) noexcept : + _sizes( _dimensions ) + { + std::fill_n( this->_sizes.begin(), _dimensions, max_value ); + } + + ndim_system( self_t &&original ) noexcept: _sizes( std::move( original._sizes ) ) {} + + ~ndim_system() {} + + self_t & operator=( const self_t &original ) = default; + + //self_t & operator=( self_t &&original ) = delete; + + inline std::size_t dimensions() const noexcept { + return _sizes.dimensions(); + } + + inline const_vector_reference get_sizes() const noexcept { + return this->_sizes; + } + +protected: + + storage_t _sizes; +}; + + } // namespace geometry + } // namespace utils +} // namespace grb + +#endif diff --git a/include/graphblas/utils/geometry/ndim_vector.hpp b/include/graphblas/utils/geometry/ndim_vector.hpp new file mode 100644 index 000000000..9c9ad3b6a --- /dev/null +++ b/include/graphblas/utils/geometry/ndim_vector.hpp @@ -0,0 +1,122 @@ + +#ifndef _NDIM_VECTOR_H_ +#define _NDIM_VECTOR_H_ + +#include +#include +#include +#include +#include +#include +#include + +#include "linearized_ndim_system.hpp" + +namespace grb { + namespace utils { + namespace geometry { + +template< typename OutT, typename CoordsT, typename StorageT > class ndim_vector { + +public: + + using const_domain_vector_reference = + typename linearized_ndim_system< CoordsT, StorageT >::const_vector_reference; + using domain_vector_storage = typename StorageT::const_vector_storage; + using domain_iterator = typename linearized_ndim_system< CoordsT, StorageT >::iterator; + +private: + + const linearized_ndim_system< CoordsT, StorageT > _linearizer; + OutT* data; + + inline std::size_t get_coordinate( domain_vector_storage coordinates ) const { + return this->_linearizer.ndim_to_linear( coordinates ); + } + + inline std::size_t get_coordinate( domain_iterator coordinates ) const { + return this->_linearizer.ndim_to_linear( coordinates ); + } + + void clean_mem() { + if ( this->data == nullptr ) { + delete[] this->data; + } + } + +public: + + ndim_vector() = delete; + + template< typename IterT > ndim_vector( IterT begin, IterT end): _linearizer( begin, end ) { + static_assert( std::is_default_constructible< OutT >::value, + "the stored type is not default constructible" ); + this->data = new OutT[ _linearizer.system_size() ]; + } + + ndim_vector( const std::vector & _sizes ): + ndim_vector( _sizes.cbegin(), _sizes.cend() ) {} + + // ndim_vector( const ndim_vector< OutT, CoordsT, StorageT >& original ): + // _linearizer( original._linearizer ) { + // this->data = new std::size_t[ original.data_size() ]; + // std::copy_n( original.data, original.data_size(), this->data ); + // } + ndim_vector( const ndim_vector< OutT, CoordsT, StorageT >& original ) = delete; + + + ndim_vector( ndim_vector< OutT, CoordsT, StorageT >&& original ) noexcept: + _linearizer( std::move( original._linearizer ) ) { + this->data = original.data; + original.data = nullptr; + } + // ndim_vector( ndim_vector< OutT, CoordsT, StorageT >&& original ) = delete; + + ndim_vector< OutT, CoordsT, StorageT >& operator=( + const ndim_vector< OutT, CoordsT, StorageT > &original ) = delete; + + ndim_vector< OutT, CoordsT, StorageT >& operator=( + ndim_vector< OutT, CoordsT, StorageT > &&original ) = delete; + + ~ndim_vector() { + this->clean_mem(); + } + + std::size_t dimensions() const { + return this->_linearizer.dimensions(); + } + + std::size_t data_size() const { + return this->_linearizer.system_size(); + } + + inline OutT& at( const_domain_vector_reference coordinates ) { + return this->data[ this->get_coordinate( coordinates.storage() ) ]; + } + + inline const OutT& at( const_domain_vector_reference coordinates ) const { + return this->data[ this->get_coordinate( coordinates.storage() ) ]; + } + + inline OutT& at( domain_vector_storage coordinates ) { + return this->data[ this->get_coordinate( coordinates ) ]; + } + + inline const OutT& at( domain_vector_storage coordinates ) const { + return this->data[ this->get_coordinate( coordinates ) ]; + } + + domain_iterator domain_begin() const { + return this->_linearizer.begin(); + } + + domain_iterator domain_end() const { + return this->_linearizer.end(); + } +}; + + } // namespace geometry + } // namespace utils +} // namespace grb + +#endif // _NDIM_VECTOR_H_ diff --git a/tests/smoke/hpcg.cpp b/tests/smoke/hpcg.cpp index 5b34d9895..8425432de 100644 --- a/tests/smoke/hpcg.cpp +++ b/tests/smoke/hpcg.cpp @@ -38,6 +38,12 @@ #include #include +#include + +#include + +// #define TEST_ITER + // here we define a custom macro and do not use NDEBUG since the latter is not defined for smoke tests #ifdef HPCG_PRINT_STEPS @@ -111,6 +117,7 @@ struct simulation_input : public system_input { size_t smoother_steps; bool evaluation_run; bool no_preconditioning; + bool print_iter_stats; }; /** @@ -197,9 +204,9 @@ void print_norm( const grb::Vector< T > & r, const char * head, const Ring & rin */ void grbProgram( const simulation_input & in, struct output & out ) { // get user process ID - assert( spmd<>::pid() < spmd<>::nprocs() ); + const size_t pid { spmd<>::pid() }; + assert( pid < spmd<>::nprocs() ); grb::utils::Timer timer; - timer.reset(); // assume successful run out.error_code = SUCCESS; @@ -207,15 +214,24 @@ void grbProgram( const simulation_input & in, struct output & out ) { // wrap hpcg_data inside a unique_ptr to forget about cleaning chores std::unique_ptr< hpcg_data< double, double, double > > hpcg_state; + if( pid == 0 ) { + thcout << "beginning input generation..." << std::endl; + } + timer.reset(); rc = build_3d_system( hpcg_state, in ); + double input_duration { timer.time() }; if( rc != SUCCESS ) { std::cerr << "Failure to generate the system (" << toString( rc ) << ")." << std::endl; out.error_code = rc; return; } + if( pid == 0 ) { + thcout << "input generation time (ms): " << input_duration << std::endl; + } + #ifdef HPCG_PRINT_SYSTEM - if( spmd<>::pid() == 0 ) { + if( pid == 0 ) { print_system( *hpcg_state ); } #endif @@ -231,7 +247,7 @@ void grbProgram( const simulation_input & in, struct output & out ) { set( x, 0.0 ); #ifdef HPCG_PRINT_SYSTEM - if( spmd<>::pid() == 0 ) { + if( pid == 0 ) { print_vector( x, 50, "X" ); print_vector( b, 50, "B" ); } @@ -242,41 +258,77 @@ void grbProgram( const simulation_input & in, struct output & out ) { const bool with_preconditioning = ! in.no_preconditioning; if( in.evaluation_run ) { out.test_repetitions = 0; + if( pid == 0 ) { + thcout << "beginning evaluation run..." << std::endl; + } timer.reset(); - rc = hpcg( *hpcg_state, with_preconditioning, in.smoother_steps, in.smoother_steps, in.max_iterations, 0.0, out.performed_iterations, out.residual ); + rc = hpcg( *hpcg_state, with_preconditioning, in.smoother_steps, in.smoother_steps, + in.max_iterations, 0.0, out.performed_iterations, out.residual, false ); double single_time = timer.time(); if( rc == SUCCESS ) { rc = collectives<>::reduce( single_time, 0, operators::max< double >() ); } + if( rc != SUCCESS ) { + thcerr << "error during evaluation run" << std::endl; + out.error_code = rc; + return; + } out.times.useful = single_time; out.test_repetitions = static_cast< size_t >( 1000.0 / single_time ) + 1; - } else { - // do benchmark + + if( pid == 0 ) { + thcout << "Evaluation run" << std::endl; + } + + std::cout << " iterations: " << out.performed_iterations << std::endl + << " computed residual: " << out.residual << std::endl + << " time taken (ms): " << out.times.useful << std::endl + << " deduced inner repetitions for 1s duration: " << out.test_repetitions << std::endl; + return; + } + + // do a cold run to warm the system up + if( pid == 0 ) { + thcout << "beginning cold run..." << std::endl; + } + timer.reset(); + rc = hpcg( *hpcg_state, with_preconditioning, in.smoother_steps, in.smoother_steps, + 1, 0.0, out.performed_iterations, out.residual, false ); + double iter_duration { timer.time() }; + if( pid == 0 ) { + thcout << "cold run duration (ms): " << iter_duration << std::endl; + } + + + // do benchmark + for( size_t i = 0; i < in.test_repetitions && rc == SUCCESS; ++i ) { + rc = set( x, 0.0 ); + assert( rc == SUCCESS ); + if( pid == 0 ) { + thcout << "beginning iteration: " << i << std::endl; + } timer.reset(); - for( size_t i = 0; i < in.test_repetitions && rc == SUCCESS; ++i ) { - rc = set( x, 0.0 ); - assert( rc == SUCCESS ); - rc = hpcg( *hpcg_state, with_preconditioning, in.smoother_steps, in.smoother_steps, in.max_iterations, 0.0, out.performed_iterations, out.residual ); - out.test_repetitions++; - if( rc != SUCCESS ) { - break; - } + rc = hpcg( *hpcg_state, with_preconditioning, in.smoother_steps, in.smoother_steps, + in.max_iterations, 0.0, out.performed_iterations, out.residual, in.print_iter_stats ); + iter_duration = timer.time(); + out.times.useful += iter_duration; + if( pid == 0 ) { + thcout << "repetition,duration (ms): " << i << "," << iter_duration << std::endl; + } + out.test_repetitions++; + if( rc != SUCCESS ) { + break; } - double time_taken { timer.time() }; - out.times.useful = time_taken / static_cast< double >( out.test_repetitions ); - // sleep( 1 ); } + out.times.useful /= static_cast< double >( in.test_repetitions ); if( spmd<>::pid() == 0 ) { if( rc == SUCCESS ) { - if( in.evaluation_run ) { - std::cout << "Info: cold HPCG completed within " << out.performed_iterations << " iterations. Last computed residual is " << out.residual << ". Time taken was " << out.times.useful - << " ms. Deduced inner repetitions parameter of " << out.test_repetitions << " to take 1 second or more per inner benchmark." << std::endl; - } else { - std::cout << "Average time taken for each of " << out.test_repetitions << " HPCG calls (hot start): " << out.times.useful << std::endl; - } + thcout << "repetitions, average time (ms): " << out.test_repetitions + << ", " << out.times.useful << std::endl; } else { - std::cerr << "Failure: call to HPCG did not succeed (" << toString( rc ) << ")." << std::endl; + thcerr << "Failure: call to HPCG did not succeed (" << toString( rc ) + << ")." << std::endl; } } @@ -285,7 +337,8 @@ void grbProgram( const simulation_input & in, struct output & out ) { // set error code out.error_code = rc; - Semiring< grb::operators::add< double >, grb::operators::mul< double >, grb::identities::zero, grb::identities::one > ring; + Semiring< grb::operators::add< double >, grb::operators::mul< double >, + grb::identities::zero, grb::identities::one > ring; grb::set( b, 1.0 ); out.square_norm_diff = 0.0; grb::eWiseMul( b, -1.0, x, ring ); @@ -303,11 +356,22 @@ void grbProgram( const simulation_input & in, struct output & out ) { */ static void parse_arguments( simulation_input &, size_t &, double &, int, char ** ); +#ifdef TEST_ITER +static void test_iters(); +static void test_iters2(); +#endif + int main( int argc, char ** argv ) { simulation_input sim_in; size_t test_outer_iterations; double max_residual_norm; +#ifdef TEST_ITER + test_iters(); + test_iters2(); + return 0; +#endif + parse_arguments( sim_in, test_outer_iterations, max_residual_norm, argc, argv ); thcout << "System size x: " << sim_in.nx << std::endl; thcout << "System size y: " << sim_in.ny << std::endl; @@ -317,6 +381,7 @@ int main( int argc, char ** argv ) { thcout << "Max iterations: " << sim_in.max_iterations << std::endl; thcout << "Direct launch: " << std::boolalpha << sim_in.evaluation_run << std::noboolalpha << std::endl; thcout << "No conditioning: " << std::boolalpha << sim_in.no_preconditioning << std::noboolalpha << std::endl; + thcout << "Print iteration residual: " << std::boolalpha << sim_in.print_iter_stats << std::noboolalpha << std::endl; thcout << "Smoother steps: " << sim_in.smoother_steps << std::endl; thcout << "Test outer iterations: " << test_outer_iterations << std::endl; thcout << "Maximum norm for residual: " << max_residual_norm << std::endl; @@ -374,7 +439,7 @@ static void parse_arguments( simulation_input & sim_in, size_t & outer_iteration parser.add_optional_argument( "--nx", sim_in.nx, PHYS_SYSTEM_SIZE_DEF, "physical system size along x" ) .add_optional_argument( "--ny", sim_in.ny, PHYS_SYSTEM_SIZE_DEF, "physical system size along y" ) .add_optional_argument( "--nz", sim_in.nz, PHYS_SYSTEM_SIZE_DEF, "physical system size along z" ) - .add_optional_argument( "--max_coarse-levels", sim_in.max_coarsening_levels, DEF_COARSENING_LEVELS, + .add_optional_argument( "--max-coarse-levels", sim_in.max_coarsening_levels, DEF_COARSENING_LEVELS, "maximum level for coarsening; 0 means no coarsening; note: actual " "level may be limited" " by the minimum system dimension" ) @@ -388,7 +453,9 @@ static void parse_arguments( simulation_input & sim_in, size_t & outer_iteration .add_option( "--evaluation-run", sim_in.evaluation_run, false, "launch single run directly, without benchmarker (ignore " "repetitions)" ) - .add_option( "--no-preconditioning", sim_in.no_preconditioning, false, "do not apply pre-conditioning via multi-grid V cycle" ); + .add_option( "--no-preconditioning", sim_in.no_preconditioning, false, "do not apply pre-conditioning via multi-grid V cycle" ) + .add_option( "--print-iter-stats", sim_in.print_iter_stats, false, "on each iteration, print more statistics" ); + parser.parse( argc, argv ); @@ -422,3 +489,186 @@ static void parse_arguments( simulation_input & sim_in, size_t & outer_iteration } } + + + +struct NZ { + size_t i; + size_t j; + double v; + + NZ( size_t _i, size_t _j, double _v ): i(_i), j(_j), v(_v) {} + + bool operator!=( const NZ& o ) const { + return i != o.i || j != o.j || v != o.v; + } +}; + +#ifdef TEST_ITER +static void test_iters() { + + using clock = std::chrono::steady_clock; + + constexpr size_t DIMS = 3; + + std::array< unsigned, DIMS > finer_sizes{ 1024, 1024, 1024}; + std::array< unsigned, DIMS > coarser_sizes; + for( size_t i = 0; i < finer_sizes.size(); i++ ) { + coarser_sizes[ i ] = finer_sizes[ i ] / 2; + } + + size_t rows { std::accumulate( coarser_sizes.cbegin(), coarser_sizes.cend(), 1UL, std::multiplies< size_t >() ) }; + + std::array< size_t, DIMS > lfiner_sizes{ 1024, 1024, 1024}; + std::array< size_t, DIMS > lcoarser_sizes{}; + for( size_t i = 0; i < lfiner_sizes.size(); i++ ) { + lcoarser_sizes[ i ] = lfiner_sizes[ i ] / 2; + } + grb::algorithms::old::coarsener_generator_iterator< DIMS, double > sbegin( lcoarser_sizes, lfiner_sizes, 0 ); + grb::algorithms::old::coarsener_generator_iterator< DIMS, double > send( lcoarser_sizes, lfiner_sizes, rows ); + + + using citer = hpcg_coarsener_builder< DIMS, unsigned, double >::hpcg_coarsener_iterator; + hpcg_coarsener_builder< DIMS, unsigned, double > coarsener( coarser_sizes, finer_sizes ); + citer pbegin( coarsener.make_begin_iterator() ); + const citer pend( coarsener.make_end_iterator() ); + + size_t num_elements = pend - pbegin; + std::cout << "number of elements: " << num_elements << std::endl; + + std::vector< NZ > svalues; + svalues.reserve( num_elements); + typename clock::time_point start( clock::now() ); + for( ; sbegin != send; ++sbegin ) { + // printf( "inserting %lu %lu\n", sbegin.i(), sbegin.j() ); + svalues.emplace_back( sbegin.i(), sbegin.j(), sbegin.v() ); + } + typename clock::time_point finish( clock::now() ); + std::cout << "sequential generation time (ms): " << + std::chrono::duration< double, std::milli >( finish - start ).count() << std::endl; + + + + + const size_t nthreads = omp_get_max_threads(); + size_t per_thread_num = ( num_elements + nthreads - 1 ) / nthreads; + std::vector< std::vector< NZ > > tvalues( nthreads ); + for( size_t i = 0; i < nthreads; i++ ) { + tvalues[i].reserve( per_thread_num ); + } + start = clock::now(); + #pragma omp parallel + { + + int t = omp_get_thread_num(); + std::vector< NZ > &tv = tvalues[ t ]; + // printf( "thread %d, size %lu\n", t, tv.size() ); + #pragma omp for schedule( static ) + for( auto it = pbegin; it != pend; ++it ) { + tv.emplace_back( it.i(), it.j(), it.v() ); + // printf( "thread %d: inserting %lu %lu\n", t, it.i(), it.j() ); + } + } + finish = clock::now(); + std::cout << "parallel generation time (ms): " << + std::chrono::duration< double, std::milli >( finish - start ).count() << std::endl; + + std::vector< NZ > pvalues; + for( const std::vector< NZ > &tv: tvalues ) { + pvalues.insert( pvalues.end(), tv.cbegin(), tv.cend() ); + } + + + if( svalues.size() != pvalues.size() ) { + std::cout << "different sizes!" << std::endl; + std::exit(-1); + } + + for( size_t i = 0; i < svalues.size(); i++ ) { + if( svalues[i] != pvalues[i] ) { + std::cout << "error at position " << i << std::endl; + } + } + std::cout << "all OK" << std::endl; +} + +static void test_iters2() { + + using clock = std::chrono::steady_clock; + + constexpr size_t DIMS = 3, halo_size = 1; + constexpr double diag_value = 26.0, non_diag_value = -1.0; + + std::array< unsigned, DIMS > sys_sizes{ 64, 64, 64}; + size_t n { std::accumulate( sys_sizes.cbegin(), sys_sizes.cend(), 1UL, std::multiplies< size_t >() ) }; + + std::array< size_t, DIMS > large_sys_sizes{ 64, 64, 64}; + old::matrix_generator_iterator< DIMS, double > sbegin( large_sys_sizes, 0UL, halo_size, diag_value, non_diag_value ); + old::matrix_generator_iterator< DIMS, double > send( large_sys_sizes, n, halo_size, diag_value, non_diag_value ); + + hpcg_builder< DIMS, unsigned, double > hpcg_system( sys_sizes, halo_size ); + matrix_generator_iterator< DIMS, unsigned, double > pbegin( + hpcg_system.make_begin_iterator( diag_value, non_diag_value ) ); + matrix_generator_iterator< DIMS, unsigned, double > pend( + hpcg_system.make_end_iterator( diag_value, non_diag_value ) + ); + + size_t num_elements = pend - pbegin; + std::cout << "number of elements: " << num_elements << std::endl; + + std::vector< NZ > svalues; + svalues.reserve( num_elements); + typename clock::time_point start( clock::now() ); + for( ; sbegin != send; ++sbegin ) { + svalues.emplace_back( sbegin.i(), sbegin.j(), sbegin.v() ); + } + typename clock::time_point finish( clock::now() ); + std::cout << "sequential generation time (ms): " << + std::chrono::duration< double, std::milli >( finish - start ).count() << std::endl; + + + + + const size_t nthreads = omp_get_max_threads(); + size_t per_thread_num = ( num_elements + nthreads - 1 ) / nthreads; + std::vector< std::vector< NZ > > tvalues( nthreads ); + for( size_t i = 0; i < nthreads; i++ ) { + tvalues[i].reserve( per_thread_num ); + } + start = clock::now(); + #pragma omp parallel + { + + int t = omp_get_thread_num(); + std::vector< NZ > &tv = tvalues[ t ]; + // printf( "thread %d, size %lu\n", t, tv.size() ); + #pragma omp for schedule( static ) + for( auto it = pbegin; it != pend; ++it ) { + tv.emplace_back( it.i(), it.j(), it.v() ); + // printf( "thread %d: inserting %lu %lu\n", t, it.i(), it.j() ); + } + } + finish = clock::now(); + std::cout << "parallel generation time (ms): " << + std::chrono::duration< double, std::milli >( finish - start ).count() << std::endl; + + std::vector< NZ > pvalues; + for( const std::vector< NZ > &tv: tvalues ) { + pvalues.insert( pvalues.end(), tv.cbegin(), tv.cend() ); + } + + + if( svalues.size() != pvalues.size() ) { + std::cout << "different sizes!" << std::endl; + std::exit(-1); + } + + for( size_t i = 0; i < svalues.size(); i++ ) { + if( svalues[i] != pvalues[i] ) { + std::cout << "error at position " << i << std::endl; + } + } + + std::cout << "all OK" << std::endl; +} +#endif // TEST_ITER From 01ecebdc64e7af5690928488b3d257d5a3861572 Mon Sep 17 00:00:00 2001 From: Alberto Scolari Date: Mon, 4 Jul 2022 15:05:56 +0200 Subject: [PATCH 04/28] building masks via iterators --- .../algorithms/hpcg/matrix_building_utils.hpp | 104 ++++++++++++++++++ 1 file changed, 104 insertions(+) diff --git a/include/graphblas/algorithms/hpcg/matrix_building_utils.hpp b/include/graphblas/algorithms/hpcg/matrix_building_utils.hpp index 2dfeabc49..4791d9d2d 100644 --- a/include/graphblas/algorithms/hpcg/matrix_building_utils.hpp +++ b/include/graphblas/algorithms/hpcg/matrix_building_utils.hpp @@ -210,6 +210,104 @@ namespace grb { return buildMatrixUnique( M, begin, end, grb::IOMode::PARALLEL ); } + template< typename T > + struct color_mask_iter { + + using self_t = color_mask_iter< T >; + using iterator_category = std::random_access_iterator_tag; + using value_type = T; + using pointer = const value_type *; + using reference = value_type; + using difference_type = long; + + color_mask_iter() = delete; + + color_mask_iter( T _num_cols, T _pos ) noexcept: + color_num( _num_cols), + position( _pos ) {} + + + color_mask_iter( const self_t &o ): + color_num( o.color_num ), + position( o.position ) {} + + //self_t & operator=( const self_t & ) = default; + + bool operator!=( const self_t &o ) const { + return position != o.position; + } + + self_t & operator++() noexcept { + position += color_num; + return *this; + } + + self_t & operator++( int ) noexcept { + return operator++(); + } + + self_t & operator+=( size_t offset ) noexcept { + position += offset * color_num; + return *this; + } + + difference_type operator-( const self_t &o ) const noexcept { + return static_cast< difference_type >( ( position - o.position ) / color_num ); + } + + pointer operator->() const { + return &position; + } + + reference operator*() const { + // std::cout << "returning " << position << std::endl; + return position; + } + + static self_t build_end_iterator( T vsize, T _num_cols, T _col ) { + T final_pos = ( ( vsize - _col + _num_cols - 1 ) / _num_cols ) * _num_cols + _col; + return self_t( _num_cols, final_pos ); + } + + private: + const T color_num; + T position; + }; + + struct true_iter { + + static const bool TRUE = true; + + using self_t = true_iter; + using iterator_category = std::random_access_iterator_tag; + using value_type = bool; + using pointer = const bool *; + using reference = bool; + using difference_type = long; + + true_iter() = default; + + bool operator!=( const self_t & ) const { + return true; + } + + self_t & operator++() noexcept { + return *this; + } + + self_t & operator++( int ) noexcept { + return operator++(); + } + + pointer operator->() const { + return &TRUE; + } + + reference operator*() const { + return true; + } + }; + /** * @brief Populates \p masks with static color mask generated for a squared matrix of size \p matrix_size . * @@ -246,12 +344,18 @@ namespace grb { grb::Vector< bool > & mask = masks.back(); // grb::set(mask, false); // DO NOT initialize false's explicitly, otherwise // RBGS will touch them too and the runtime will increase! + /* for( std::size_t j = i; j < matrix_size; j += colors ) { rc = grb::setElement( mask, true, j ); assert( rc == grb::SUCCESS ); if( rc != grb::SUCCESS ) return rc; } + */ + color_mask_iter< unsigned > begin( colors, i ); + color_mask_iter< unsigned > end = + color_mask_iter< unsigned >::build_end_iterator( matrix_size, colors, i ); + grb::buildVectorUnique( mask, begin, end, true_iter(), true_iter(), IOMode::SEQUENTIAL ); } return rc; } From 1b0a7402954415ec7957f847a44d156d7d5583bb Mon Sep 17 00:00:00 2001 From: Alberto Scolari Date: Fri, 18 Nov 2022 14:01:22 +0100 Subject: [PATCH 05/28] factoring out logic to build base system, better handling error values during input creation and using N-dimensional system generator in multiple places --- .../graphblas/algorithms/hpcg/hpcg_data.hpp | 44 ++++- .../algorithms/hpcg/matrix_building_utils.hpp | 120 ++++++------- .../algorithms/hpcg/ndim_matrix_builders.hpp | 10 +- .../hpcg/old_ndim_matrix_builders.hpp | 14 ++ .../algorithms/hpcg/system_building_utils.hpp | 164 ++++++++++++------ 5 files changed, 224 insertions(+), 128 deletions(-) diff --git a/include/graphblas/algorithms/hpcg/hpcg_data.hpp b/include/graphblas/algorithms/hpcg/hpcg_data.hpp index 96b39856d..c53ef99e4 100644 --- a/include/graphblas/algorithms/hpcg/hpcg_data.hpp +++ b/include/graphblas/algorithms/hpcg/hpcg_data.hpp @@ -43,7 +43,10 @@ namespace grb { * @tparam IOType type of values of the vectors for intermediate results * @tparam NonzeroType type of the values stored inside the system matrix #A */ - template< typename IOType, typename NonzeroType > + template< + typename IOType, + typename NonzeroType + > struct system_data { const std::size_t system_size; ///< size of the system, i.e. side of the #A @@ -65,14 +68,24 @@ namespace grb { * of rows and columns of the #A matrix. */ system_data( std::size_t sys_size ) : - system_size( sys_size ), A( sys_size, sys_size ), A_diagonal( sys_size ), z( sys_size ), r( sys_size ), - // temp(sys_size), - smoother_temp( sys_size ) {} + system_size( sys_size ), + A( sys_size, sys_size ), + A_diagonal( sys_size ), + z( sys_size ), + r( sys_size ), + smoother_temp( sys_size ) { } // for safety, disable copy semantics system_data( const system_data & o ) = delete; system_data & operator=( const system_data & ) = delete; + + grb::RC zero_temp_vectors() { + grb::RC rc = grb::set( z, 0 ); + rc = rc ? rc : grb::set( r, 0 ); + rc = rc ? rc : grb::set( smoother_temp, 0 ); + return rc; + } }; /** @@ -104,7 +117,10 @@ namespace grb { * As for \ref system_data, internal vectors and matrices are initialized to the proper size, * but their values are \b not initialized. */ - template< typename IOType, typename NonzeroType > + template< + typename IOType, + typename NonzeroType + > struct multi_grid_data : public system_data< IOType, NonzeroType > { const std::size_t finer_size; ///< ssize of the finer system to coarse from; @@ -125,7 +141,10 @@ namespace grb { * @param[in] _finer_size size of the finer system, i.e. size of external objects \b before coarsening */ multi_grid_data( std::size_t coarser_size, std::size_t _finer_size ) : - system_data< IOType, NonzeroType >( coarser_size ), finer_size( _finer_size ), Ax_finer( finer_size ), coarsening_matrix( coarser_size, finer_size ) { + system_data< IOType, NonzeroType >( coarser_size ), + finer_size( _finer_size ), + Ax_finer( finer_size ), + coarsening_matrix( coarser_size, finer_size ) { coarser_level = nullptr; } @@ -137,6 +156,12 @@ namespace grb { delete coarser_level; } } + + grb::RC zero_temp_vectors() { + grb::RC rc = this->system_data< IOType, NonzeroType >::zero_temp_vectors(); + rc = rc ? rc : grb::set( Ax_finer, 0 ); + return rc; + } }; /** @@ -185,6 +210,13 @@ namespace grb { delete coarser_level; } } + + grb::RC zero_temp_vectors() { + grb::RC rc = this->system_data< IOType, NonzeroType >::zero_temp_vectors(); + rc = rc ? rc : grb::set( u, 0 ); + rc = rc ? rc : grb::set( p, 0 ); + return rc; + } }; } // namespace algorithms diff --git a/include/graphblas/algorithms/hpcg/matrix_building_utils.hpp b/include/graphblas/algorithms/hpcg/matrix_building_utils.hpp index 4791d9d2d..2ccd18fa8 100644 --- a/include/graphblas/algorithms/hpcg/matrix_building_utils.hpp +++ b/include/graphblas/algorithms/hpcg/matrix_building_utils.hpp @@ -38,15 +38,6 @@ #include "ndim_matrix_builders.hpp" -#define PAR - - - -#ifndef PAR -#include -#endif - - namespace grb { namespace algorithms { @@ -76,20 +67,6 @@ namespace grb { begin += first; } -#ifndef PAR - template< typename T > void partition_rows( - T rows, - T& first_row, - T& last_row - ) { - const size_t num_procs{ spmd<>::nprocs() }; - const T per_process{ ( rows + num_procs - 1 ) / num_procs }; // round up - first_row = std::min( per_process * static_cast< T >( spmd<>::pid() ), rows ); - last_row = std::min( first_row + per_process, rows ); - } -#endif - - /** * @brief Builds a \p DIMS -dimensional system matrix for HPCG simulation. * @@ -107,24 +84,25 @@ namespace grb { * @param non_diag_value value outside of the diagonal * @return grb::RC the success value returned when trying to build the matrix */ - template< std::size_t DIMS, typename T, enum grb::Backend B > - grb::RC build_ndims_system_matrix( grb::Matrix< T, B > & M, const std::array< std::size_t, DIMS > & sys_sizes, std::size_t halo_size, T diag_value, T non_diag_value ) { - static_assert( DIMS > 0, "DIMS must be > 0" ); - size_t n { std::accumulate( sys_sizes.cbegin(), sys_sizes.cend(), 1UL, std::multiplies< size_t >() ) }; - if( grb::nrows( M ) != n || grb::nrows( M ) != grb::ncols( M ) ) { - throw std::invalid_argument( "wrong matrix dimensions: matrix should " - "be square" - " and in accordance with given system " - "sizes" ); - } -#ifdef PAR - using coord_t = unsigned; - if( n > std::numeric_limits< coord_t >::max() ) { + template< + std::size_t DIMS, + typename coord_t, + typename T, + enum grb::Backend B + > grb::RC build_ndims_system_matrix( + grb::Matrix< T, B > & M, + const grb::algorithms::hpcg_builder< DIMS, coord_t, T > & hpcg_system, + T diag_value, + T non_diag_value + ) { + if( hpcg_system.system_size() > std::numeric_limits< coord_t >::max() ) { throw std::domain_error( "CoordT cannot store the matrix coordinates" ); } + /* std::array< coord_t, DIMS > _sys_sizes; for( size_t i = 0; i < DIMS; i++ ) _sys_sizes[i] = sys_sizes[i]; grb::algorithms::hpcg_builder< DIMS, coord_t, T > hpcg_system( _sys_sizes, halo_size ); + */ grb::algorithms::matrix_generator_iterator< DIMS, coord_t, T > begin( hpcg_system.make_begin_iterator( diag_value, non_diag_value ) ); grb::algorithms::matrix_generator_iterator< DIMS, coord_t, T > end( @@ -133,12 +111,6 @@ namespace grb { partition_iteration_range( hpcg_system.system_size(), begin, end ); // std::cout << "num nonzeroes " << ( end - begin ) << std::endl; -#else - size_t first_row, last_row; - partition_rows( n, first_row, last_row ); - grb::algorithms::old::matrix_generator_iterator< DIMS, T > begin( sys_sizes, first_row, halo_size, diag_value, non_diag_value ); - grb::algorithms::old::matrix_generator_iterator< DIMS, T > end( sys_sizes, last_row, halo_size, diag_value, non_diag_value ); -#endif return buildMatrixUnique( M, begin, end, grb::IOMode::PARALLEL ); } @@ -164,8 +136,15 @@ namespace grb { * in \p coarser_size , otherwise an exception is thrown * @return grb::RC the success value returned when trying to build the matrix */ - template< std::size_t DIMS, typename T, enum grb::Backend B > - grb::RC build_ndims_coarsener_matrix( grb::Matrix< T, B > & M, const std::array< std::size_t, DIMS > & coarser_sizes, const std::array< std::size_t, DIMS > & finer_sizes ) { + template< + std::size_t DIMS, + typename T, + enum grb::Backend B + > grb::RC build_ndims_coarsener_matrix( + grb::Matrix< T, B > & M, + const std::array< std::size_t, DIMS > & coarser_sizes, + const std::array< std::size_t, DIMS > & finer_sizes + ) { static_assert( DIMS > 0, "DIMS must be > 0" ); size_t const rows { std::accumulate( coarser_sizes.cbegin(), coarser_sizes.cend(), 1UL, std::multiplies< size_t >() ) }; for( std::size_t i { 0 }; i < coarser_sizes.size(); i++ ) { @@ -182,7 +161,6 @@ namespace grb { " with rows == " "and cols == " ); } -#ifdef PAR using coord_t = unsigned; if( rows > std::numeric_limits< coord_t >::max() ) { throw std::domain_error( "CoordT cannot store the row coordinates" ); @@ -201,12 +179,6 @@ namespace grb { coarsener.make_end_iterator() ); partition_iteration_range( coarsener.system_size(), begin, end ); -#else - size_t first_row, last_row; - partition_rows( rows, first_row, last_row ); - grb::algorithms::old::coarsener_generator_iterator< DIMS, T > begin( coarser_sizes, finer_sizes, first_row ); - grb::algorithms::old::coarsener_generator_iterator< DIMS, T > end( coarser_sizes, finer_sizes, last_row ); -#endif return buildMatrixUnique( M, begin, end, grb::IOMode::PARALLEL ); } @@ -274,40 +246,58 @@ namespace grb { T position; }; + template< typename CoordT > struct true_iter { - static const bool TRUE = true; + static const bool __TRUE = true; - using self_t = true_iter; + using self_t = true_iter< CoordT >; using iterator_category = std::random_access_iterator_tag; using value_type = bool; using pointer = const bool *; - using reference = bool; + using reference = const bool&; using difference_type = long; - true_iter() = default; + true_iter() = delete; + + true_iter( CoordT first ): index( first ) {} + + true_iter( const self_t & ) = default; - bool operator!=( const self_t & ) const { - return true; + self_t & operator=( const self_t & ) = default; + + bool operator!=( const self_t & other ) const { + return this->index != other.index; } self_t & operator++() noexcept { + (void) index++; return *this; } - self_t & operator++( int ) noexcept { - return operator++(); + self_t & operator+=( size_t increment ) noexcept { + index += increment; + return *this; + } + + difference_type operator-( const self_t & other ) noexcept { + return static_cast< difference_type >( this->index - other.index ); } pointer operator->() const { - return &TRUE; + return &__TRUE; } reference operator*() const { - return true; + return *(this->operator->()); } + + private: + CoordT index; }; + template< typename CoordT > const bool true_iter< CoordT >::__TRUE; + /** * @brief Populates \p masks with static color mask generated for a squared matrix of size \p matrix_size . * @@ -327,7 +317,11 @@ namespace grb { * @return grb::RC the success value returned when trying to build the vector */ template< enum grb::Backend B > - grb::RC build_static_color_masks( std::vector< grb::Vector< bool, B > > & masks, std::size_t matrix_size, std::size_t colors ) { + grb::RC build_static_color_masks( + std::vector< grb::Vector< bool, B > > & masks, + std::size_t matrix_size, + std::size_t colors + ) { if( ! masks.empty() ) { throw std::invalid_argument( "vector of masks is expected to be " "empty" ); @@ -355,7 +349,7 @@ namespace grb { color_mask_iter< unsigned > begin( colors, i ); color_mask_iter< unsigned > end = color_mask_iter< unsigned >::build_end_iterator( matrix_size, colors, i ); - grb::buildVectorUnique( mask, begin, end, true_iter(), true_iter(), IOMode::SEQUENTIAL ); + grb::buildVectorUnique( mask, begin, end, true_iter< size_t >( 0 ), true_iter< size_t >( matrix_size ), IOMode::SEQUENTIAL ); } return rc; } diff --git a/include/graphblas/algorithms/hpcg/ndim_matrix_builders.hpp b/include/graphblas/algorithms/hpcg/ndim_matrix_builders.hpp index 06672d110..db94c8a29 100644 --- a/include/graphblas/algorithms/hpcg/ndim_matrix_builders.hpp +++ b/include/graphblas/algorithms/hpcg/ndim_matrix_builders.hpp @@ -291,22 +291,26 @@ namespace grb { hpcg_builder< DIMS, CoordT, T> & operator=( hpcg_builder< DIMS, CoordT, T> && ) = delete; size_t system_size() const { + return system.base_system_size(); + } + + size_t num_neighbors() const { return system.halo_system_size(); } hpcg_sys_iterator make_begin_iterator( T diag, T non_diag - ) { + ) const { return hpcg_sys_iterator( system, diag, non_diag ); } hpcg_sys_iterator make_end_iterator( T diag, T non_diag - ) { + ) const { hpcg_sys_iterator result( system, diag, non_diag ); - result += system_size() - 1; // do not trigger boundary checks + result += num_neighbors() - 1; // do not trigger boundary checks ++result; return result; } diff --git a/include/graphblas/algorithms/hpcg/old_ndim_matrix_builders.hpp b/include/graphblas/algorithms/hpcg/old_ndim_matrix_builders.hpp index 256995b02..9f64e9884 100644 --- a/include/graphblas/algorithms/hpcg/old_ndim_matrix_builders.hpp +++ b/include/graphblas/algorithms/hpcg/old_ndim_matrix_builders.hpp @@ -202,6 +202,13 @@ namespace grb { using array_t = typename row_generator< DIMS >::array_t; using value_type = std::pair< std::pair< row_coordinate_type, column_coordinate_type >, T >; + using RowIndexType = typename row_generator< DIMS >::row_coordinate_type; + using ColumnIndexType = typename row_generator< DIMS >::row_coordinate_type; + using iterator_category = std::forward_iterator_tag; + using pointer = const value_type; + using reference = const value_type&; + using difference_type = long; + // halo may in future become a DIM-size array to iterate in arbitrary shapes const row_coordinate_type halo; ///< number of points per dimension to iterate around const nonzero_value_type diagonal_value; ///< value to be emitted when the object has moved to the diagonal @@ -418,6 +425,13 @@ namespace grb { using array_t = typename row_generator< DIMS >::array_t; using value_type = std::pair< std::pair< row_coordinate_type, column_coordinate_type >, T >; + using RowIndexType = typename row_generator< DIMS >::row_coordinate_type; + using ColumnIndexType = typename row_generator< DIMS >::row_coordinate_type; + using iterator_category = std::forward_iterator_tag; + using pointer = const value_type; + using reference = const value_type&; + using difference_type = long; + // the sizes to project from const array_t finer_sizes; ///< the size of the finer system (columns) array_t steps; ///< array of steps, i.e. how much each column coordinate (finer system) must be diff --git a/include/graphblas/algorithms/hpcg/system_building_utils.hpp b/include/graphblas/algorithms/hpcg/system_building_utils.hpp index 959d21969..78759a539 100644 --- a/include/graphblas/algorithms/hpcg/system_building_utils.hpp +++ b/include/graphblas/algorithms/hpcg/system_building_utils.hpp @@ -29,6 +29,7 @@ #include #include #include +#include #include #include @@ -66,16 +67,87 @@ namespace grb { */ template< std::size_t DIMS, typename T > struct hpcg_system_params { - const std::array< std::size_t, DIMS > & physical_sys_sizes; - const std::size_t halo_size; - const std::size_t num_colors; - const T diag_value; - const T non_diag_value; - const std::size_t min_phys_size; - const std::size_t max_levels; - const std::size_t coarsening_step; + std::array< std::size_t, DIMS > physical_sys_sizes; + std::size_t halo_size; + std::size_t num_colors; + T diag_value; + T non_diag_value; + std::size_t min_phys_size; + std::size_t max_levels; + std::size_t coarsening_step; }; + // SystemData must have a zero_temp_vectors() + template< std::size_t DIMS, typename IOType, typename NonzeroType, typename SystemData > + grb::RC build_base_system( + typename std::enable_if< + std::is_base_of< system_data< IOType, NonzeroType >, SystemData >::value, + SystemData& >::type system, + size_t system_size, + const std::array< std::size_t, DIMS > & physical_sys_sizes, + size_t halo_size, + NonzeroType diag_value, + NonzeroType non_diag_value, + size_t num_colors, + std::array< double, 3 > & times + ) { + + grb::RC rc { grb::SUCCESS }; + const size_t pid { spmd<>::pid() }; + grb::utils::Timer timer; + static const char * const log_prefix = " -- "; + + using coord_t = unsigned; + static_assert( DIMS > 0, "DIMS must be > 0" ); + size_t n { std::accumulate( physical_sys_sizes.cbegin(), physical_sys_sizes.cend(), + 1UL, std::multiplies< size_t >() ) }; + if( n > std::numeric_limits< coord_t >::max() ) { + throw std::domain_error( "CoordT cannot store the matrix coordinates" ); + } + std::array< coord_t, DIMS > sys_sizes; + for( size_t i = 0; i < DIMS; i++ ) sys_sizes[i] = physical_sys_sizes[i]; + grb::algorithms::hpcg_builder< DIMS, coord_t, NonzeroType > system_generator( sys_sizes, halo_size ); + + MASTER_PRINT( pid, log_prefix << "generating system matrix..." ); + timer.reset(); + rc = build_ndims_system_matrix< DIMS, coord_t, NonzeroType >( + system.A, + system_generator, + diag_value, non_diag_value + ); + if( rc != grb::SUCCESS ) { + return rc; + } + times[ 0 ] = timer.time(); + MASTER_PRINT( pid, " time (ms) " << times[ 0 ] << std::endl ); + + // set values of vectors + MASTER_PRINT( pid, log_prefix << "populating vectors..." ); + timer.reset(); + rc = set( system.A_diagonal, diag_value ); + if( rc != grb::SUCCESS ) { + return rc; + } + rc = system.zero_temp_vectors(); + if( rc != grb::SUCCESS ) { + return rc; + } + times[ 1 ] = timer.time(); + MASTER_PRINT( pid, " time (ms) " << times[ 1 ] << std::endl ); + + + MASTER_PRINT( pid, log_prefix << "generating color masks..." ); + timer.reset(); + rc = build_static_color_masks( system.color_masks, system_size, num_colors ); + if( rc != grb::SUCCESS ) { + return rc; + } + times[ 2 ] = timer.time(); + MASTER_PRINT( pid, " time (ms) " << times[ 2 ] << std::endl ); + + return rc; + } + /** * @brief Generates an entire HPCG problem according to the parameters in \p params , storing it in \p holder . * @@ -87,9 +159,13 @@ namespace grb { * otherwise the first unsuccessful return value */ template< std::size_t DIMS, typename T = double > - grb::RC build_hpcg_system( std::unique_ptr< grb::algorithms::hpcg_data< T, T, T > > & holder, hpcg_system_params< DIMS, T > & params ) { + grb::RC build_hpcg_system( + std::unique_ptr< grb::algorithms::hpcg_data< T, T, T > > & holder, + const hpcg_system_params< DIMS, T > & params + ) { // n is the system matrix size - const std::size_t n { std::accumulate( params.physical_sys_sizes.cbegin(), params.physical_sys_sizes.cend(), 1UL, std::multiplies< std::size_t >() ) }; + const std::size_t n { std::accumulate( params.physical_sys_sizes.cbegin(), + params.physical_sys_sizes.cend(), 1UL, std::multiplies< std::size_t >() ) }; grb::algorithms::hpcg_data< T, T, T > * data { new grb::algorithms::hpcg_data< T, T, T >( n ) }; @@ -100,30 +176,21 @@ namespace grb { grb::RC rc { grb::SUCCESS }; const size_t pid { spmd<>::pid() }; grb::utils::Timer timer; - MASTER_PRINT( pid, "\n-- generating system matrix...\n" << std::endl ); - grb::spmd<>::barrier(); - timer.reset(); - rc = build_ndims_system_matrix< DIMS, T >( data->A, params.physical_sys_sizes, params.halo_size, params.diag_value, params.non_diag_value ); - MASTER_PRINT( pid, "\n-- generating system matrix... time (ms) " << timer.time() << std::endl ); + std::array< double, 3 > times; + MASTER_PRINT( pid, "\n-- main system" << std::endl ); + rc = build_base_system< DIMS, T, T, grb::algorithms::hpcg_data< T, T, T > >( *data, n, params.physical_sys_sizes, params.halo_size, + params.diag_value, params.non_diag_value, params.num_colors, times ); if( rc != grb::SUCCESS ) { - MASTER_PRINT( pid, "Failure to generate the initial system (" - << toString( rc ) << ") of size " << n << "\n" ); + MASTER_PRINT( pid, " error: " << toString( rc ) ); return rc; } - - // set values of vectors - MASTER_PRINT( pid, "-- populating vectors..." ); - timer.reset(); - set( data->A_diagonal, params.diag_value ); - data->zero_temp_vectors(); - MASTER_PRINT( pid, " time (ms) " << timer.time() << std::endl ); - - - MASTER_PRINT( pid, "-- generating color masks...\n" << std::endl ); - timer.reset(); - build_static_color_masks( data->color_masks, n, params.num_colors ); - MASTER_PRINT( pid, "\n\n-- generating color masks... time (ms) " << timer.time() << std::endl ); + MASTER_PRINT( pid, "-- main system generation time (ms) " + "[system matrix,vectors,color masks]:" + << times[ 0 ] + << "," << times[ 1 ] + << "," << times[ 2 ] << std::endl; + ); // initialize coarsening with additional pointers and dimensions copies to iterate and divide grb::algorithms::multi_grid_data< T, T > ** coarser = &data->coarser_level; @@ -142,47 +209,32 @@ namespace grb { std::size_t coarser_size { std::accumulate( coarser_sizes.cbegin(), coarser_sizes.cend(), 1UL, std::multiplies< std::size_t >() ) }; std::size_t previous_size { std::accumulate( previous_sizes.cbegin(), previous_sizes.cend(), 1UL, std::multiplies< std::size_t >() ) }; // build data structures for new level - grb::algorithms::multi_grid_data< double, double > * new_coarser { new grb::algorithms::multi_grid_data< double, double >( coarser_size, previous_size ) }; + grb::algorithms::multi_grid_data< T, T > * new_coarser { new grb::algorithms::multi_grid_data< double, double >( coarser_size, previous_size ) }; // install coarser level immediately to cleanup in case of build error *coarser = new_coarser; - MASTER_PRINT( pid, "-- level " << coarsening_level << "\n\tgenerating coarsening matrix...\n" ); + MASTER_PRINT( pid, "-- level " << coarsening_level << "\n -- generating coarsening matrix...\n" ); timer.reset(); // initialize coarsener matrix, system matrix and diagonal vector for the coarser level rc = build_ndims_coarsener_matrix< DIMS >( new_coarser->coarsening_matrix, coarser_sizes, previous_sizes ); if( rc != grb::SUCCESS ) { - MASTER_PRINT( pid, "Failure to generate coarsening matrix (" << toString( rc ) << ").\n" ); + MASTER_PRINT( pid, " error: " << toString( rc ) ); return rc; } double coarsener_gen_time{ timer.time() }; - MASTER_PRINT( pid, "\tgenerating system matrix...\n" ); - timer.reset(); - rc = build_ndims_system_matrix< DIMS, T >( new_coarser->A, coarser_sizes, params.halo_size, params.diag_value, params.non_diag_value ); + rc = build_base_system< DIMS, T, T, grb::algorithms::multi_grid_data< T, T > >( *new_coarser, coarser_size, coarser_sizes, params.halo_size, + params.diag_value, params.non_diag_value, params.num_colors, times ); if( rc != grb::SUCCESS ) { - MASTER_PRINT( pid, "Failure to generate system matrix (" << toString( rc ) - << ") for size " << coarser_size << "\n" ); + MASTER_PRINT( pid, " error: " << toString( rc ) ); return rc; } - double coarse_sys_gen_time{ timer.time() }; - - MASTER_PRINT( pid, "\tpopulating vectors...\n" ); - timer.reset(); - set( new_coarser->A_diagonal, params.diag_value ); - new_coarser->zero_temp_vectors(); - double coarser_vec_gen_time{ timer.time() }; - - // build color masks for coarser level (same masks, but with coarser system size) - MASTER_PRINT( pid, "\tgenerating color masks..." << std::endl ); - timer.reset(); - rc = build_static_color_masks( new_coarser->color_masks, coarser_size, params.num_colors ); - double coarse_masks_sys_time{ timer.time() }; - MASTER_PRINT( pid, "-- level " << coarsening_level << "... time (ms) for " - "[coarsening matrix,coarse system matrix,coarser vectors,color masks]:" + MASTER_PRINT( pid, "-- level generation time (ms) " + "[level,coarsening matrix,system matrix,vectors,color masks]:" << coarsening_level << "," << coarsener_gen_time - << "," << coarse_sys_gen_time - << "," << coarser_vec_gen_time - << "," << coarse_masks_sys_time << std::endl; + << "," << times[ 0 ] + << "," << times[ 1 ] + << "," << times[ 2 ] << std::endl; ); // prepare for new iteration From 7525c1811b530711d592a7805d9501641d3d94c2 Mon Sep 17 00:00:00 2001 From: Alberto Scolari Date: Fri, 18 Nov 2022 11:50:48 +0100 Subject: [PATCH 06/28] adding greedy coloring algorithm for HPCG smoother and generating color masks according to the result of the coloring algorithm and adding test for coloring algorithm --- .../graphblas/algorithms/hpcg/coloring.hpp | 165 ++++++++++++++++++ .../algorithms/hpcg/matrix_building_utils.hpp | 132 +++++--------- .../algorithms/hpcg/ndim_matrix_builders.hpp | 12 ++ .../algorithms/hpcg/system_building_utils.hpp | 58 ++++-- tests/smoke/hpcg.cpp | 64 +++++-- 5 files changed, 307 insertions(+), 124 deletions(-) create mode 100644 include/graphblas/algorithms/hpcg/coloring.hpp diff --git a/include/graphblas/algorithms/hpcg/coloring.hpp b/include/graphblas/algorithms/hpcg/coloring.hpp new file mode 100644 index 000000000..1e6378c59 --- /dev/null +++ b/include/graphblas/algorithms/hpcg/coloring.hpp @@ -0,0 +1,165 @@ + +/* + * Copyright 2021 Huawei Technologies Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +#ifndef _H_GRB_ALGORITHMS_HPCG_COLORING +#define _H_GRB_ALGORITHMS_HPCG_COLORING + +#include +#include + +#include + +namespace grb { + namespace algorithms { + + + /** + * Coloring algorithm for matrix generated by a \p DIMS - dimensional system. + * + * This function implements a < b>greedy heuristics< /b> to color the rows of a matrix generated by + * a \p DIMS - dimensional generator \p system, so that no two connected elements < em>(i,j)< /em> + * in the system (corresponding to a nonzero < em>(i,j)< /em> entry in the matrix) have the same color. + * If \p reorder_rows_per_color is false (as per default), the coloring information is stored into + * \p row_colors, while \p color_counters stores the number of rows for each color. + * + * If \p reorder_rows_per_color is true, the function performs the additional step of \b re-ordering + * the rows depending on their color: rows of color \a 0 are moved first, then rows of color \a 1 + * are moved to the following positions and so on. In this case, \p row_colors stores the new row number + * while \p color_counters stores at each position \a i the new position of the first row of color \a i. + * + * In both cases, \a color_counters.size() gives the number of found colors. + * + * @tparam DIMS dimensions of the system + * @tparam CoordType type of the coordinates + * @param[in] system generator for an \p DIMS - dimesional system with halo + * @param[out] row_colors if \p reorder_rows_per_color is false, stores the color of each row; + * if \p reorder_rows_per_color is true, stores the new position of each row, so that rows + * of the same color are gourped together; the initial content of the vector is destroyed + * @param[out] color_counters if \p reorder_rows_per_color is false, stores the number of rows per color; + * if \p reorder_rows_per_color is true, stores at each position \a i the offset in \p color_counters + * where the (clustered) rows of color \a i start from; the initial content of the vector is destroyed + * @param[in] reorder_rows_per_color whether to do the clustering after the coloring + */ + template< + std::size_t DIMS, + typename CoordType + > void color_matrix_greedy( + const grb::utils::geometry::linearized_halo_ndim_system< CoordType, DIMS > &system, + std::vector< CoordType > &row_colors, + std::vector< CoordType > &color_counters, + bool reorder_rows_per_color = false + ) { + + // This function can be used to completely transform any part of the data structures. + // Right now it does nothing, so compiling with a check for unused variables results in complaints + + CoordType nrows = system.system_size(); + row_colors.insert( row_colors.begin(), nrows, nrows ); // value `nrow' means `uninitialized'; initialized colors go from 0 to nrow-1 + CoordType totalColors = 1; + row_colors[0] = 0; // first point gets color 0 + + // Finds colors in a greedy (a likely non-optimal) fashion. + typename grb::utils::geometry::linearized_halo_ndim_system< CoordType, DIMS >::iterator begin = system.begin(); + begin.next_element(); // skip first row + + while( begin.has_more_elements() ) { + CoordType curRow = begin->get_element_linear(); + + if( row_colors[ curRow ] != nrows ) { + // if color already assigned to curRow + continue; + } + std::vector< bool > assigned( totalColors, false ); + CoordType currentlyAssigned = 0; + + while( begin.has_more_neighbours() ) { + CoordType curCol = begin->get_neighbor_linear(); + if( curCol < curRow ) { + assert( row_colors[ curCol ] < nrows ); // if curCol < curRow, curCol has already a color assigned + std::vector< bool >::reference color_is_assigned = assigned[ row_colors[ curCol ] ]; + if( !color_is_assigned ) { + // count how many colors are already assigned + (void) currentlyAssigned++; + } + // track which colors are assigned + color_is_assigned = true; + } // else // could take advantage of indices being sorted + begin.next_neighbour(); + } + + if( currentlyAssigned < totalColors ) { + // if there is at least one color left to use, look for it + for( CoordType j = 0; j < totalColors; ++j ) { + if( !assigned[ j ] ) { + // if no neighbor with this color, use it for this row + row_colors[ curRow ] = j; + break; + } + } + } else { + assert( row_colors[ curRow ] == nrows ); + if( row_colors[ curRow ] == nrows ) { + row_colors[ curRow ] = totalColors; + (void) totalColors++; + } else { + assert( 0 ); // should never get here + } + } + begin.next_element(); + } + +#ifdef _DEBUG + std::cout << "assigned colors: " << totalColors << " [ -> ]\n"; + for( size_t i = 0; i < row_colors.size(); i++ ){ + std::cout << i << " -> " << row_colors[ i ] << ", "; + } + std::cout << std::endl; +#endif + + // count number of vertices per color + color_counters.insert( color_counters.begin(), totalColors, 0 ); + for( CoordType i = 0; i < nrows; ++i ) { + (void) color_counters[ row_colors[ i ] ]++; + } + + if( !reorder_rows_per_color ) { + return; + } + + // form in-place prefix scan + CoordType old = 0, old0; + for( CoordType i = 1; i < totalColors; ++i ) { + old0 = color_counters[i]; + color_counters[i] = color_counters[i-1] + old; + old = old0; + } + color_counters[0] = 0; + + // translate `colors' into a permutation + for( CoordType i = 0; i < nrows; ++i ) { + row_colors[ i ] = color_counters[ row_colors[ i ] ]++; + } + } + + } // namespace algorithms +} // namespace grb + + + +#endif // _H_GRB_ALGORITHMS_HPCG_COLORING + diff --git a/include/graphblas/algorithms/hpcg/matrix_building_utils.hpp b/include/graphblas/algorithms/hpcg/matrix_building_utils.hpp index 2ccd18fa8..45486e99b 100644 --- a/include/graphblas/algorithms/hpcg/matrix_building_utils.hpp +++ b/include/graphblas/algorithms/hpcg/matrix_building_utils.hpp @@ -32,6 +32,8 @@ #include #include #include +#include +#include #include @@ -57,6 +59,9 @@ namespace grb { IterT &begin, IterT &end ) { + static_assert( std::is_base_of< std::random_access_iterator_tag, + typename std::iterator_traits< IterT >::iterator_category >::value, + "the given iterator is not a random access one" ); assert( num_nonzeroes == static_cast< size_t >( end - begin ) ); size_t first, last; partition_nonzeroes( num_nonzeroes, first, last ); @@ -108,9 +113,8 @@ namespace grb { grb::algorithms::matrix_generator_iterator< DIMS, coord_t, T > end( hpcg_system.make_end_iterator( diag_value, non_diag_value ) ); - partition_iteration_range( hpcg_system.system_size(), begin, end ); + partition_iteration_range( hpcg_system.num_neighbors(), begin, end ); - // std::cout << "num nonzeroes " << ( end - begin ) << std::endl; return buildMatrixUnique( M, begin, end, grb::IOMode::PARALLEL ); } @@ -182,70 +186,6 @@ namespace grb { return buildMatrixUnique( M, begin, end, grb::IOMode::PARALLEL ); } - template< typename T > - struct color_mask_iter { - - using self_t = color_mask_iter< T >; - using iterator_category = std::random_access_iterator_tag; - using value_type = T; - using pointer = const value_type *; - using reference = value_type; - using difference_type = long; - - color_mask_iter() = delete; - - color_mask_iter( T _num_cols, T _pos ) noexcept: - color_num( _num_cols), - position( _pos ) {} - - - color_mask_iter( const self_t &o ): - color_num( o.color_num ), - position( o.position ) {} - - //self_t & operator=( const self_t & ) = default; - - bool operator!=( const self_t &o ) const { - return position != o.position; - } - - self_t & operator++() noexcept { - position += color_num; - return *this; - } - - self_t & operator++( int ) noexcept { - return operator++(); - } - - self_t & operator+=( size_t offset ) noexcept { - position += offset * color_num; - return *this; - } - - difference_type operator-( const self_t &o ) const noexcept { - return static_cast< difference_type >( ( position - o.position ) / color_num ); - } - - pointer operator->() const { - return &position; - } - - reference operator*() const { - // std::cout << "returning " << position << std::endl; - return position; - } - - static self_t build_end_iterator( T vsize, T _num_cols, T _col ) { - T final_pos = ( ( vsize - _col + _num_cols - 1 ) / _num_cols ) * _num_cols + _col; - return self_t( _num_cols, final_pos ); - } - - private: - const T color_num; - T position; - }; - template< typename CoordT > struct true_iter { @@ -318,40 +258,50 @@ namespace grb { */ template< enum grb::Backend B > grb::RC build_static_color_masks( - std::vector< grb::Vector< bool, B > > & masks, std::size_t matrix_size, - std::size_t colors + const std::vector< std::vector< size_t > > &per_color_rows, + std::vector< grb::Vector< bool, B > > & masks ) { if( ! masks.empty() ) { - throw std::invalid_argument( "vector of masks is expected to be " - "empty" ); - } - if( matrix_size < colors ) { - throw std::invalid_argument( "syztem size is < number of colors: too " - "small" ); + throw std::invalid_argument( "vector of masks is expected to be empty" ); } - grb::RC rc { grb::SUCCESS }; - masks.reserve( colors ); - for( std::size_t i { 0U }; i < colors; i++ ) { - // build in-place, assuming the compiler deduces the right constructor according to B + for( size_t i = 0; i < per_color_rows.size(); i++ ) { + const std::vector< size_t > & rows = per_color_rows[ i ]; + /* + { + std::cout << "\ncolor " << i << std::endl; + for( size_t row : rows ) { + std::cout << row << " "; + } + std::cout << std::endl; + } + */ masks.emplace_back( matrix_size ); - grb::Vector< bool > & mask = masks.back(); - // grb::set(mask, false); // DO NOT initialize false's explicitly, otherwise - // RBGS will touch them too and the runtime will increase! + grb::Vector< bool > & output_mask = masks.back(); + std::vector< size_t >::const_iterator begin = rows.cbegin(); + std::vector< size_t >::const_iterator end = rows.cend(); + // partition_iteration_range( rows.size(), begin, end ); + grb::RC rc = grb::buildVectorUnique( output_mask, begin , end, true_iter< size_t >( 0 ), + true_iter< size_t >( std::distance( begin, end ) ), IOMode::SEQUENTIAL ); + if( rc != SUCCESS ) { + std::cerr << "error while creating output mask for color " << i << ": " + << toString( rc ) << std::endl; + return rc; + } /* - for( std::size_t j = i; j < matrix_size; j += colors ) { - rc = grb::setElement( mask, true, j ); - assert( rc == grb::SUCCESS ); - if( rc != grb::SUCCESS ) - return rc; + { + std::cout << "mask color " << i << std::endl; + size_t count = 0; + for( const auto & v : output_mask ) { + std::cout << v.first << " "; + count++; + if( count > 20 ) break; + } + std::cout << std::endl; } */ - color_mask_iter< unsigned > begin( colors, i ); - color_mask_iter< unsigned > end = - color_mask_iter< unsigned >::build_end_iterator( matrix_size, colors, i ); - grb::buildVectorUnique( mask, begin, end, true_iter< size_t >( 0 ), true_iter< size_t >( matrix_size ), IOMode::SEQUENTIAL ); } - return rc; + return grb::SUCCESS; } } // namespace algorithms diff --git a/include/graphblas/algorithms/hpcg/ndim_matrix_builders.hpp b/include/graphblas/algorithms/hpcg/ndim_matrix_builders.hpp index db94c8a29..35a15238d 100644 --- a/include/graphblas/algorithms/hpcg/ndim_matrix_builders.hpp +++ b/include/graphblas/algorithms/hpcg/ndim_matrix_builders.hpp @@ -215,6 +215,10 @@ namespace grb { return _val.v(); } + const __iter_t & it() const { + return this->_sys_iter; + } + private: value_type _val; const linear_system_t *_lin_system; @@ -298,6 +302,10 @@ namespace grb { return system.halo_system_size(); } + const system_t & get_generator() const { + return system; + } + hpcg_sys_iterator make_begin_iterator( T diag, T non_diag @@ -499,6 +507,10 @@ namespace grb { return _val.v(); } + const __iter_t & it() const { + return this->_sys_iter; + } + private: //// incremented when incrementing the row coordinates; is is the ration between //// #finer_sizes and row_generator#physical_sizes diff --git a/include/graphblas/algorithms/hpcg/system_building_utils.hpp b/include/graphblas/algorithms/hpcg/system_building_utils.hpp index 78759a539..77bef1995 100644 --- a/include/graphblas/algorithms/hpcg/system_building_utils.hpp +++ b/include/graphblas/algorithms/hpcg/system_building_utils.hpp @@ -37,6 +37,8 @@ #include "hpcg_data.hpp" #include "matrix_building_utils.hpp" +#include "coloring.hpp" + #ifndef MASTER_PRINT #define INTERNAL_MASTER_PRINT #define MASTER_PRINT( pid, txt ) if( pid == 0 ) { std::cout << txt; } @@ -69,7 +71,6 @@ namespace grb { struct hpcg_system_params { std::array< std::size_t, DIMS > physical_sys_sizes; std::size_t halo_size; - std::size_t num_colors; T diag_value; T non_diag_value; std::size_t min_phys_size; @@ -77,6 +78,17 @@ namespace grb { std::size_t coarsening_step; }; + template< typename CoordType > void split_rows_by_color( + const std::vector< CoordType > & row_colors, + size_t num_colors, + std::vector< std::vector< CoordType > > & per_color_rows + ) { + per_color_rows.resize( num_colors ); + for( CoordType i = 0; i < row_colors.size(); i++ ) { + per_color_rows[ row_colors[ i ] ].push_back( i ); + } + } + // SystemData must have a zero_temp_vectors() template< std::size_t DIMS, typename IOType, typename NonzeroType, typename SystemData > grb::RC build_base_system( @@ -88,8 +100,7 @@ namespace grb { size_t halo_size, NonzeroType diag_value, NonzeroType non_diag_value, - size_t num_colors, - std::array< double, 3 > & times + std::array< double, 4 > & times ) { grb::RC rc { grb::SUCCESS }; @@ -97,7 +108,7 @@ namespace grb { grb::utils::Timer timer; static const char * const log_prefix = " -- "; - using coord_t = unsigned; + using coord_t = size_t; static_assert( DIMS > 0, "DIMS must be > 0" ); size_t n { std::accumulate( physical_sys_sizes.cbegin(), physical_sys_sizes.cend(), 1UL, std::multiplies< size_t >() ) }; @@ -135,15 +146,28 @@ namespace grb { times[ 1 ] = timer.time(); MASTER_PRINT( pid, " time (ms) " << times[ 1 ] << std::endl ); + MASTER_PRINT( pid, log_prefix << "running coloring heuristics..." ); + timer.reset(); + std::vector< coord_t > colors, color_counters; + color_matrix_greedy( system_generator.get_generator(), colors, color_counters ); + std::vector< std::vector< coord_t > > per_color_rows; + split_rows_by_color( colors, color_counters.size(), per_color_rows ); + if( rc != grb::SUCCESS ) { + return rc; + } + times[ 2 ] = timer.time(); + MASTER_PRINT( pid, " found " << color_counters.size() << " colors, time (ms) " + << times[ 2 ] << std::endl ); + MASTER_PRINT( pid, log_prefix << "generating color masks..." ); timer.reset(); - rc = build_static_color_masks( system.color_masks, system_size, num_colors ); + rc = build_static_color_masks( system_size, per_color_rows, system.color_masks ); if( rc != grb::SUCCESS ) { return rc; } - times[ 2 ] = timer.time(); - MASTER_PRINT( pid, " time (ms) " << times[ 2 ] << std::endl ); + times[ 3 ] = timer.time(); + MASTER_PRINT( pid, " time (ms) " << times[ 3 ] << std::endl ); return rc; } @@ -177,19 +201,17 @@ namespace grb { const size_t pid { spmd<>::pid() }; grb::utils::Timer timer; - std::array< double, 3 > times; + std::array< double, 4 > times; MASTER_PRINT( pid, "\n-- main system" << std::endl ); rc = build_base_system< DIMS, T, T, grb::algorithms::hpcg_data< T, T, T > >( *data, n, params.physical_sys_sizes, params.halo_size, - params.diag_value, params.non_diag_value, params.num_colors, times ); + params.diag_value, params.non_diag_value, times ); if( rc != grb::SUCCESS ) { MASTER_PRINT( pid, " error: " << toString( rc ) ); return rc; } MASTER_PRINT( pid, "-- main system generation time (ms) " - "[system matrix,vectors,color masks]:" - << times[ 0 ] - << "," << times[ 1 ] - << "," << times[ 2 ] << std::endl; + "(system matrix,vectors,coloring,color masks):" << times[ 0 ] << "," << times[ 1 ] + << "," << times[ 2 ] << "," << times[ 3 ] << std::endl; ); // initialize coarsening with additional pointers and dimensions copies to iterate and divide @@ -224,17 +246,15 @@ namespace grb { double coarsener_gen_time{ timer.time() }; rc = build_base_system< DIMS, T, T, grb::algorithms::multi_grid_data< T, T > >( *new_coarser, coarser_size, coarser_sizes, params.halo_size, - params.diag_value, params.non_diag_value, params.num_colors, times ); + params.diag_value, params.non_diag_value, times ); if( rc != grb::SUCCESS ) { MASTER_PRINT( pid, " error: " << toString( rc ) ); return rc; } MASTER_PRINT( pid, "-- level generation time (ms) " - "[level,coarsening matrix,system matrix,vectors,color masks]:" - << coarsening_level << "," << coarsener_gen_time - << "," << times[ 0 ] - << "," << times[ 1 ] - << "," << times[ 2 ] << std::endl; + "(level,coarsening matrix,system matrix,vectors,coloring,color masks):" + << coarsening_level << "," << coarsener_gen_time << "," << times[ 0 ] << "," << times[ 1 ] + << "," << times[ 2 ] << "," << times[ 3 ] << std::endl; ); // prepare for new iteration diff --git a/tests/smoke/hpcg.cpp b/tests/smoke/hpcg.cpp index 8425432de..93c69d87e 100644 --- a/tests/smoke/hpcg.cpp +++ b/tests/smoke/hpcg.cpp @@ -35,10 +35,6 @@ #include #include -#include -#include - -#include #include @@ -68,6 +64,12 @@ void print_norm( const grb::Vector< T > &r, const char * head, const Ring &ring #define DBG_print_norm( vec, head ) print_norm( vec, head ) #endif +#include +#include + +#include +#include + #include #include @@ -160,9 +162,9 @@ T static next_pow_2( T n ) { * @return RC grb::SUCCESS if the system initialization within GraphBLAS succeeded */ static RC build_3d_system( std::unique_ptr< hpcg_data< double, double, double > > & holder, const system_input & in ) { - const std::array< size_t, 3 > physical_sys_sizes { in.nx, in.ny, in.nz }; struct hpcg_system_params< 3, double > params { - physical_sys_sizes, HALO_RADIUS, BAND_WIDTH_3D * 2 + 1, SYSTEM_DIAG_VALUE, SYSTEM_NON_DIAG_VALUE, PHYS_SYSTEM_SIZE_MIN, in.max_coarsening_levels, 2 + { in.nx, in.ny, in.nz }, HALO_RADIUS, SYSTEM_DIAG_VALUE, SYSTEM_NON_DIAG_VALUE, + PHYS_SYSTEM_SIZE_MIN, in.max_coarsening_levels, 2 }; return build_hpcg_system< 3, double >( holder, params ); @@ -361,6 +363,8 @@ static void test_iters(); static void test_iters2(); #endif +void test_system_iter(); + int main( int argc, char ** argv ) { simulation_input sim_in; size_t test_outer_iterations; @@ -371,6 +375,8 @@ int main( int argc, char ** argv ) { test_iters2(); return 0; #endif + test_system_iter(); + // return 0; parse_arguments( sim_in, test_outer_iterations, max_residual_norm, argc, argv ); thcout << "System size x: " << sim_in.nx << std::endl; @@ -490,6 +496,34 @@ static void parse_arguments( simulation_input & sim_in, size_t & outer_iteration } +void test_system_iter() { + constexpr size_t DIMS = 2; + using row_index_t = size_t; + std::array< row_index_t, DIMS > dims; + dims.fill( 4 ); + grb::utils::geometry::linearized_halo_ndim_system< row_index_t, DIMS > system( dims, 1 ); + grb::utils::geometry::linearized_halo_ndim_system< row_index_t, DIMS >::iterator begin = system.begin(); + + while( begin.has_more_elements() ) { + std::cout << "row " << begin->get_element_linear() << ": "; + while( begin.has_more_neighbours() ) { + std::cout << /* "-- " << */ begin->get_neighbor_linear() << " "; + begin.next_neighbour(); + } + std::cout << std::endl; + begin.next_element(); + } + + std::vector< size_t > colors, counters; + color_matrix_greedy( system, colors, counters ); + + std::cout << "final assignment:" << std::endl; + for( size_t i = 0; i < colors.size(); i++ ){ + std::cout << i << " -> " << colors[ i ] << ", "; + } + std::cout << std::endl; +} + struct NZ { @@ -510,9 +544,10 @@ static void test_iters() { using clock = std::chrono::steady_clock; constexpr size_t DIMS = 3; + using coord_t = size_t; - std::array< unsigned, DIMS > finer_sizes{ 1024, 1024, 1024}; - std::array< unsigned, DIMS > coarser_sizes; + std::array< coord_t, DIMS > finer_sizes{ 1024, 1024, 1024}; + std::array< coord_t, DIMS > coarser_sizes; for( size_t i = 0; i < finer_sizes.size(); i++ ) { coarser_sizes[ i ] = finer_sizes[ i ] / 2; } @@ -528,8 +563,8 @@ static void test_iters() { grb::algorithms::old::coarsener_generator_iterator< DIMS, double > send( lcoarser_sizes, lfiner_sizes, rows ); - using citer = hpcg_coarsener_builder< DIMS, unsigned, double >::hpcg_coarsener_iterator; - hpcg_coarsener_builder< DIMS, unsigned, double > coarsener( coarser_sizes, finer_sizes ); + using citer = hpcg_coarsener_builder< DIMS, coord_t, double >::hpcg_coarsener_iterator; + hpcg_coarsener_builder< DIMS, coord_t, double > coarsener( coarser_sizes, finer_sizes ); citer pbegin( coarsener.make_begin_iterator() ); const citer pend( coarsener.make_end_iterator() ); @@ -595,21 +630,22 @@ static void test_iters() { static void test_iters2() { using clock = std::chrono::steady_clock; + using coord_t = size_t; constexpr size_t DIMS = 3, halo_size = 1; constexpr double diag_value = 26.0, non_diag_value = -1.0; - std::array< unsigned, DIMS > sys_sizes{ 64, 64, 64}; + std::array< coord_t, DIMS > sys_sizes{ 64, 64, 64}; size_t n { std::accumulate( sys_sizes.cbegin(), sys_sizes.cend(), 1UL, std::multiplies< size_t >() ) }; std::array< size_t, DIMS > large_sys_sizes{ 64, 64, 64}; old::matrix_generator_iterator< DIMS, double > sbegin( large_sys_sizes, 0UL, halo_size, diag_value, non_diag_value ); old::matrix_generator_iterator< DIMS, double > send( large_sys_sizes, n, halo_size, diag_value, non_diag_value ); - hpcg_builder< DIMS, unsigned, double > hpcg_system( sys_sizes, halo_size ); - matrix_generator_iterator< DIMS, unsigned, double > pbegin( + hpcg_builder< DIMS, coord_t, double > hpcg_system( sys_sizes, halo_size ); + matrix_generator_iterator< DIMS, coord_t, double > pbegin( hpcg_system.make_begin_iterator( diag_value, non_diag_value ) ); - matrix_generator_iterator< DIMS, unsigned, double > pend( + matrix_generator_iterator< DIMS, coord_t, double > pend( hpcg_system.make_end_iterator( diag_value, non_diag_value ) ); From f93194360c32c9dddbf8c11434cce742c82c47e0 Mon Sep 17 00:00:00 2001 From: Alberto Scolari Date: Fri, 18 Nov 2022 14:48:05 +0100 Subject: [PATCH 07/28] re-organizing the code with dedicated runners for the various algorithmical components of HPCG - splitting facilities to build the data for multi-grid, smoother and coarsener renaming classes in geometry to abide by naming conventions - better splitting of hpcg data structures - splitting out MG options into dedicated data structure - splitting out options and outputs of HPCG making kernels of HPCG simulation composable by the user - documenting classes for MG moving IteratorValueAdaptor to iterator folder - moving MG data structures to the files using them --- .../graphblas/algorithms/hpcg/coloring.hpp | 23 +- include/graphblas/algorithms/hpcg/hpcg.hpp | 254 +------ .../graphblas/algorithms/hpcg/hpcg_data.hpp | 227 ------ .../algorithms/hpcg/matrix_building_utils.hpp | 310 -------- .../algorithms/hpcg/multigrid_v_cycle.hpp | 252 ------ .../algorithms/hpcg/ndim_matrix_builders.hpp | 322 ++------ .../hpcg/old_matrix_building_utils.hpp | 173 ----- .../hpcg/old_ndim_matrix_builders.hpp | 562 -------------- .../algorithms/hpcg/system_building_utils.hpp | 443 ++++++----- .../algorithms/multigrid/coarsener.hpp | 197 +++++ .../multigrid/multigrid_building_utils.hpp | 56 ++ .../algorithms/multigrid/multigrid_cg.hpp | 360 +++++++++ .../algorithms/multigrid/multigrid_data.hpp | 105 +++ .../multigrid/multigrid_v_cycle.hpp | 237 ++++++ .../red_black_gauss_seidel.hpp | 127 +++- .../utils/geometry/array_vector_storage.hpp | 137 ++-- .../utils/geometry/dynamic_vector_storage.hpp | 154 ++++ .../utils/geometry/generic_vector_storage.hpp | 117 --- .../halo_matrix_generator_iterator.hpp | 207 +++++ .../linearized_halo_ndim_geometry.hpp | 423 ++++++----- .../linearized_halo_ndim_iterator.hpp | 718 +++++++++--------- .../geometry/linearized_halo_ndim_system.hpp | 185 ++--- .../geometry/linearized_ndim_iterator.hpp | 335 ++++---- .../utils/geometry/linearized_ndim_system.hpp | 342 +++++---- .../graphblas/utils/geometry/ndim_system.hpp | 164 ++-- .../graphblas/utils/geometry/ndim_vector.hpp | 228 +++--- .../utils/iterators/IteratorValueAdaptor.hpp | 128 ++++ .../utils/iterators/partition_range.hpp | 71 ++ tests/smoke/hpcg.cpp | 475 +++++------- 29 files changed, 3545 insertions(+), 3787 deletions(-) delete mode 100644 include/graphblas/algorithms/hpcg/hpcg_data.hpp delete mode 100644 include/graphblas/algorithms/hpcg/matrix_building_utils.hpp delete mode 100644 include/graphblas/algorithms/hpcg/multigrid_v_cycle.hpp delete mode 100644 include/graphblas/algorithms/hpcg/old_matrix_building_utils.hpp delete mode 100644 include/graphblas/algorithms/hpcg/old_ndim_matrix_builders.hpp create mode 100644 include/graphblas/algorithms/multigrid/coarsener.hpp create mode 100644 include/graphblas/algorithms/multigrid/multigrid_building_utils.hpp create mode 100644 include/graphblas/algorithms/multigrid/multigrid_cg.hpp create mode 100644 include/graphblas/algorithms/multigrid/multigrid_data.hpp create mode 100644 include/graphblas/algorithms/multigrid/multigrid_v_cycle.hpp rename include/graphblas/algorithms/{hpcg => multigrid}/red_black_gauss_seidel.hpp (57%) create mode 100644 include/graphblas/utils/geometry/dynamic_vector_storage.hpp delete mode 100644 include/graphblas/utils/geometry/generic_vector_storage.hpp create mode 100644 include/graphblas/utils/geometry/halo_matrix_generator_iterator.hpp create mode 100644 include/graphblas/utils/iterators/IteratorValueAdaptor.hpp create mode 100644 include/graphblas/utils/iterators/partition_range.hpp diff --git a/include/graphblas/algorithms/hpcg/coloring.hpp b/include/graphblas/algorithms/hpcg/coloring.hpp index 1e6378c59..f9334afb3 100644 --- a/include/graphblas/algorithms/hpcg/coloring.hpp +++ b/include/graphblas/algorithms/hpcg/coloring.hpp @@ -1,6 +1,6 @@ /* - * Copyright 2021 Huawei Technologies Co., Ltd. + * Copyright 2022 Huawei Technologies Co., Ltd. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -27,13 +27,12 @@ namespace grb { namespace algorithms { - /** * Coloring algorithm for matrix generated by a \p DIMS - dimensional system. * * This function implements a < b>greedy heuristics< /b> to color the rows of a matrix generated by - * a \p DIMS - dimensional generator \p system, so that no two connected elements < em>(i,j)< /em> - * in the system (corresponding to a nonzero < em>(i,j)< /em> entry in the matrix) have the same color. + * a \p DIMS - dimensional generator \p system, so that no two connected elements \a i,j + * in the system (corresponding to a nonzero \a (i,j) entry in the matrix) have the same color. * If \p reorder_rows_per_color is false (as per default), the coloring information is stored into * \p row_colors, while \p color_counters stores the number of rows for each color. * @@ -44,22 +43,27 @@ namespace grb { * * In both cases, \a color_counters.size() gives the number of found colors. * + * This algorithm performs a \a global coloring of the input system, i.e. it must run on the entire system + * \a before any partitioning occurs. Although this is not scalable, it should not be a problem for + * most sizes, as the constants in front of this algorithms are very small. Implementing a distributed + * coloring algorithm is anyway out of the scope of this prototype. + * * @tparam DIMS dimensions of the system * @tparam CoordType type of the coordinates * @param[in] system generator for an \p DIMS - dimesional system with halo * @param[out] row_colors if \p reorder_rows_per_color is false, stores the color of each row; * if \p reorder_rows_per_color is true, stores the new position of each row, so that rows - * of the same color are gourped together; the initial content of the vector is destroyed + * of the same color are grouped together; the initial content of the vector is destroyed * @param[out] color_counters if \p reorder_rows_per_color is false, stores the number of rows per color; * if \p reorder_rows_per_color is true, stores at each position \a i the offset in \p color_counters * where the (clustered) rows of color \a i start from; the initial content of the vector is destroyed * @param[in] reorder_rows_per_color whether to do the clustering after the coloring */ template< - std::size_t DIMS, + size_t DIMS, typename CoordType > void color_matrix_greedy( - const grb::utils::geometry::linearized_halo_ndim_system< CoordType, DIMS > &system, + const grb::utils::geometry::LinearizedHaloNDimSystem< CoordType, DIMS > &system, std::vector< CoordType > &row_colors, std::vector< CoordType > &color_counters, bool reorder_rows_per_color = false @@ -74,7 +78,7 @@ namespace grb { row_colors[0] = 0; // first point gets color 0 // Finds colors in a greedy (a likely non-optimal) fashion. - typename grb::utils::geometry::linearized_halo_ndim_system< CoordType, DIMS >::iterator begin = system.begin(); + typename grb::utils::geometry::LinearizedHaloNDimSystem< CoordType, DIMS >::Iterator begin = system.begin(); begin.next_element(); // skip first row while( begin.has_more_elements() ) { @@ -159,7 +163,4 @@ namespace grb { } // namespace algorithms } // namespace grb - - #endif // _H_GRB_ALGORITHMS_HPCG_COLORING - diff --git a/include/graphblas/algorithms/hpcg/hpcg.hpp b/include/graphblas/algorithms/hpcg/hpcg.hpp index 492eb038d..2d30584fe 100644 --- a/include/graphblas/algorithms/hpcg/hpcg.hpp +++ b/include/graphblas/algorithms/hpcg/hpcg.hpp @@ -1,6 +1,6 @@ /* - * Copyright 2021 Huawei Technologies Co., Ltd. + * Copyright 2022 Huawei Technologies Co., Ltd. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,229 +15,57 @@ * limitations under the License. */ -/** - * @file hpcg.hpp - * @author Alberto Scolari (alberto.scolari@huawei.com) - * @brief File with the main routine to run a full HPCG simulation, comprising multi-grid runs - * with Red-Black Gauss-Seidel smoothing. - * @date 2021-04-30 - */ - #ifndef _H_GRB_ALGORITHMS_HPCG #define _H_GRB_ALGORITHMS_HPCG -#include - -#include "hpcg_data.hpp" -#include "multigrid_v_cycle.hpp" - -#include +#include +#include +#include +#include +#include namespace grb { namespace algorithms { - /** - * @brief High-Performance Conjugate Gradient algorithm implementation running entirely on GraphBLAS. - * - * Finds the solution x of an \f$ A x = b \f$ algebraic system by running the HPCG algorithm. - * The implementation here closely follows the reference HPCG benchmark used for the HPCG500 rank, - * visible at https://github.com/hpcg-benchmark/hpcg. - * The only difference is the usage of a Red-Black Gauss-Seidel smoother instead of the standard one - * for performance reasons, as the standard Gauss-Seidel algorithm is inherently sequential and not - * expressible in terms of standard linear algebra operations. - * In particular, this implementation (as the standard one) couples a standard CG algorithm with a V-cycle - * multi-grid solver to initially refine the tentative solution. This refinement step depends on the - * availability of coarsening information, which should be stored inside \p data; otherwise, - * the refinement is not performed and only the CG algorithm is run. For more information on inputs - * and on coarsening information, you may consult the \ref hpcg_data class documentation. - * - * This implementation assumes that the vectors and matrices inside \p data are all correctly initialized - * and populated with the proper values; in particular - * - hpcg_data#x with the initial tentative solution (iterative solutions are also stored here) - * - hpcg_data#A with the system matrix - * - hpcg_data#b with the right-hand side vector \f$ b \f$ - * - hpcg_data#A_diagonal with the diagonal values of the matrix - * - hpcg_data#color_masks with the color masks for this level - * - hpcg_data#coarser_level with the information for the coarser multi-grid run (if any) - * The other vectors are assumed to be inizialized (via the usual grb::Vector#Vector(size_t) constructor) - * but not necessarily populated with values, as they are internally populated when needed; hence, - * any previous values are overwritten. - * - * Failuers of GraphBLAS operations are handled by immediately stopping the execution and by returning - * the failure code. - * - * @tparam IOType type of result and intermediate vectors used during computation - * @tparam ResidualType type of the residual norm - * @tparam NonzeroType type of matrix values - * @tparam InputType type of values of the right-hand side vector b - * @tparam Ring the ring of algebraic operators zero-values - * @tparam Minus the minus operator for subtractions - * - * @param[in,out] data \ref hpcg_data object storing inputs, outputs and temporary vectors used for the computation, - * as long as the information for the recursive multi-grid runs - * @param[in] with_preconditioning whether to use pre-conditioning, i.e. to perform multi-grid runs - * @param[in] presmoother_steps number of pre-smoother steps, for multi-grid runs - * @param[in] postsmoother_steps nomber of post-smoother steps, for multi-grid runs - * @param[in] max_iterations maximum number if iterations the simulation may run for; once reached, - * the simulation stops even if the residual norm is above \p tolerance - * @param[in] tolerance the tolerance over the residual norm, i.e. the value of the residual norm to stop - * the simulation at - * @param[out] iterations numbers of iterations performed - * @param[out] norm_residual norm of the final residual - * @param[in] ring the ring to perform the operations on - * @param[in] minus the \f$ - \f$ operator for vector subtractions - * @return grb::RC::SUCCESS if the algorithm could correctly terminate, the error code of the first - * unsuccessful operation otherwise - */ - template< typename IOType, + // simply "assemble" types + template< + typename IOType, typename ResidualType, typename NonzeroType, typename InputType, - class Ring = Semiring< grb::operators::add< IOType >, grb::operators::mul< IOType >, grb::identities::zero, grb::identities::one >, - class Minus = operators::subtract< IOType > > - grb::RC hpcg( hpcg_data< IOType, NonzeroType, InputType > &data, - bool with_preconditioning, - const size_t presmoother_steps, - const size_t postsmoother_steps, - const size_t max_iterations, - const ResidualType tolerance, - size_t &iterations, - ResidualType &norm_residual, - bool print_iter_stats, - const Ring &ring = Ring(), - const Minus &minus = Minus() - ) { - ResidualType alpha; - - const grb::Matrix< NonzeroType > &A { data.A }; - grb::Vector< IOType > &x { data.x }; - const grb::Vector< InputType > &b { data.b }; - grb::Vector< IOType > &r { data.r }; // residual vector - grb::Vector< IOType > &p { data.p }; // direction vector - grb::Vector< IOType > &Ap { data.u }; // temp vector - grb::Vector< IOType > &z { data.z }; // pre-conditioned residual vector - grb::RC ret { SUCCESS }; - - ret = ret ? ret : grb::set( Ap, 0 ); - ret = ret ? ret : grb::set( r, 0 ); - ret = ret ? ret : grb::set( p, 0 ); - - ret = ret ? ret : grb::set( p, x ); - ret = ret ? ret : grb::mxv< grb::descriptors::dense >( Ap, A, x, ring ); // Ap = A * x - assert( ret == SUCCESS ); - - ret = ret ? ret : grb::eWiseApply( r, b, Ap, minus ); // r = b - Ap; - assert( ret == SUCCESS ); - - norm_residual = ring.template getZero< ResidualType >(); - ret = ret ? ret : grb::dot( norm_residual, r, r, ring ); // norm_residual = r' * r; - assert( ret == SUCCESS ); - - // compute sqrt to avoid underflow - norm_residual = std::sqrt( norm_residual ); - - // initial norm of residual - const ResidualType norm_residual_initial { norm_residual }; - ResidualType old_r_dot_z { 0.0 }, r_dot_z { 0.0 }, beta { 0.0 }; - size_t iter { 0 }; - - grb::utils::Timer timer; - -#ifdef HPCG_PRINT_STEPS - DBG_print_norm( p, "start p" ); - DBG_print_norm( Ap, "start Ap" ); - DBG_print_norm( r, "start r" ); -#endif - - do { -#ifdef HPCG_PRINT_STEPS - DBG_println( "========= iteration " << iter << " =========" ); -#endif - if( with_preconditioning ) { - if( print_iter_stats ) { - timer.reset(); - } - ret = ret ? ret : internal::multi_grid( data, data.coarser_level, - presmoother_steps, postsmoother_steps, ring, minus ); - assert( ret == SUCCESS ); - if( print_iter_stats ) { - double duration = timer.time(); - std::cout << "iteration, pre-conditioner: " << iter << "," - << duration << std::endl; - } - } else { - ret = ret ? ret : grb::set( z, r ); // z = r; - assert( ret == SUCCESS ); - } -#ifdef HPCG_PRINT_STEPS - DBG_print_norm( z, "initial z" ); -#endif - - ResidualType pAp; - - if( iter == 0 ) { - ret = ret ? ret : grb::set( p, z ); // p = z; - assert( ret == SUCCESS ); - - ret = ret ? ret : grb::dot( r_dot_z, r, z, ring ); // r_dot_z = r' * z; - assert( ret == SUCCESS ); - } else { - old_r_dot_z = r_dot_z; - - r_dot_z = ring.template getZero< ResidualType >(); - ret = ret ? ret : grb::dot( r_dot_z, r, z, ring ); // r_dot_z = r' * z; - assert( ret == SUCCESS ); - - beta = r_dot_z / old_r_dot_z; - ret = ret ? ret : grb::clear( Ap ); // Ap = 0; - ret = ret ? ret : grb::eWiseMulAdd( Ap, beta, p, z, ring ); // Ap += beta * p + z; - std::swap( Ap, p ); // p = Ap; - assert( ret == SUCCESS ); - } -#ifdef HPCG_PRINT_STEPS - DBG_print_norm( p, "middle p" ); -#endif - - ret = ret ? ret : grb::set( Ap, 0 ); - ret = ret ? ret : grb::mxv< grb::descriptors::dense >( Ap, A, p, ring ); // Ap = A * p; - assert( ret == SUCCESS ); -#ifdef HPCG_PRINT_STEPS - DBG_print_norm( Ap, "middle Ap" ); -#endif - pAp = static_cast< ResidualType >( 0.0 ); - ret = ret ? ret : grb::dot( pAp, Ap, p, ring ); // pAp = p' * Ap - assert( ret == SUCCESS ); - - alpha = r_dot_z / pAp; - - ret = ret ? ret : grb::eWiseMul( x, alpha, p, ring ); // x += alpha * p; - assert( ret == SUCCESS ); -#ifdef HPCG_PRINT_STEPS - DBG_print_norm( x, "end x" ); -#endif - - ret = ret ? ret : grb::eWiseMul( r, -alpha, Ap, ring ); // r += - alpha * Ap; - assert( ret == SUCCESS ); -#ifdef HPCG_PRINT_STEPS - DBG_print_norm( r, "end r" ); -#endif - - norm_residual = static_cast< ResidualType >( 0.0 ); - ret = ret ? ret : grb::dot( norm_residual, r, r, ring ); // residual = r' * r; - assert( ret == SUCCESS ); - - norm_residual = std::sqrt( norm_residual ); - - if( print_iter_stats ) { - std::cout << "iteration, residual: " << iter << "," << norm_residual << std::endl; - } - - ++iter; - } while( iter < max_iterations && norm_residual / norm_residual_initial > tolerance && ret == SUCCESS ); - - iterations = iter; - return ret; + class Ring, + class Minus + > using HPCGRunnerType = mg_cg_runner< IOType, NonzeroType, InputType, ResidualType, + multigrid_runner< IOType, NonzeroType, InputType, + red_black_smoother_runner< IOType, NonzeroType, Ring >, + single_point_coarsener< IOType, NonzeroType, Ring, Minus >, + Ring, Minus >, + Ring, Minus + >; + + template< + typename IOType, + typename ResidualType, + typename NonzeroType, + typename InputType, + class Ring, + class Minus + > HPCGRunnerType< IOType, ResidualType, NonzeroType, InputType, Ring, Minus > + build_hpcg_runner( size_t smoother_steps ) { + + single_point_coarsener< IOType, NonzeroType, Ring, Minus > coarsener; + red_black_smoother_runner< IOType, NonzeroType, Ring > + smoother{ smoother_steps, smoother_steps, 1UL, {}, Ring() }; + + multigrid_runner< IOType, NonzeroType, InputType, + red_black_smoother_runner< IOType, NonzeroType, Ring >, + single_point_coarsener< IOType, NonzeroType, Ring, Minus >, + Ring, Minus + > mg_runner( std::move( smoother ), std::move( coarsener ) ); + + return HPCGRunnerType< IOType, ResidualType, NonzeroType, InputType, Ring, Minus >( + std::move( mg_runner ) ); } } // namespace algorithms diff --git a/include/graphblas/algorithms/hpcg/hpcg_data.hpp b/include/graphblas/algorithms/hpcg/hpcg_data.hpp deleted file mode 100644 index c53ef99e4..000000000 --- a/include/graphblas/algorithms/hpcg/hpcg_data.hpp +++ /dev/null @@ -1,227 +0,0 @@ - -/* - * Copyright 2021 Huawei Technologies Co., Ltd. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * @file hpcg_data.hpp - * @author Alberto Scolari (alberto.scolari@huawei.com) - * @brief Data structures to store HPCG input/output data. - * @date 2021-04-30 - */ - -#ifndef _H_GRB_ALGORITHMS_HPCG_DATA -#define _H_GRB_ALGORITHMS_HPCG_DATA - -#include -#include - -#include - - -namespace grb { - - namespace algorithms { - - /** - * @brief basic data container for the HPCG algorithm, storing \b only the - * data in common between the full CG run and the V-cycle multi-grid solver. - * Additional data are stored in inheriting daata structures. - * - * @tparam IOType type of values of the vectors for intermediate results - * @tparam NonzeroType type of the values stored inside the system matrix #A - */ - template< - typename IOType, - typename NonzeroType - > - struct system_data { - - const std::size_t system_size; ///< size of the system, i.e. side of the #A - - grb::Matrix< NonzeroType > A; ///< system matrix - grb::Vector< IOType > A_diagonal; ///< vector with the diagonal of #A - grb::Vector< IOType > z; ///< multi-grid solution - grb::Vector< IOType > r; ///< residual - grb::Vector< IOType > smoother_temp; ///< for smoother's intermediate results - std::vector< grb::Vector< bool > > color_masks; ///< for color masks - - /** - * @brief Constructor building all the stored vectors and matrices. - * - * Stored vectors and matrices are constructed according to \p sys_size but \b not initialized - * to any value internally, as initialization is up to users's code. - * - * @param[in] sys_size the size of the underlying physical system, i.e. the size of vectors and the number - * of rows and columns of the #A matrix. - */ - system_data( std::size_t sys_size ) : - system_size( sys_size ), - A( sys_size, sys_size ), - A_diagonal( sys_size ), - z( sys_size ), - r( sys_size ), - smoother_temp( sys_size ) { } - - // for safety, disable copy semantics - system_data( const system_data & o ) = delete; - - system_data & operator=( const system_data & ) = delete; - - grb::RC zero_temp_vectors() { - grb::RC rc = grb::set( z, 0 ); - rc = rc ? rc : grb::set( r, 0 ); - rc = rc ? rc : grb::set( smoother_temp, 0 ); - return rc; - } - }; - - /** - * @brief Data container for all multi-grid inputs and outputs. - * - * @tparam IOType Type of values of the vectors for intermediate results - * @tparam NonzeroType Type of the values stored inside the system matrix \p A - * and the coarsening matrix #Ax_finer - * - * This data structure stores information for a full multi-grid V cycle, i.e. - * - input and output vectors for solution, residual and temporary vectors - * - coarsening information, in particular the #coarsening_matrix that - * coarsens a larger system of size #finer_size to the current system - * of size #system_size - * - the next level of coarsening, pointed to by #coarser_level, possibly being \c nullptr - * if no further coarsening is desired; note that this information is automatically - * destructed on object destruction (if any) - * - * Vectors stored here refer to the \b coarsened system (with the exception of #Ax_finer), - * thus having size #system_size; this also holds for the system matrix #A, - * while #coarsening_matrix has size #system_size \f$ \times \f$ #finer_size. - * Hence, the typical usage of this data structure is to coarsen \b external vectors, e.g. vectors - * coming from another \code multi_grid_data \endcode object whose #system_size equals - * \code this-> \endcode #fines_size, via \code this-> \endcode #coarsening_matrix and store the coarsened - * vectors internally. Mimicing the recursive behavior of standard multi-grid simulations, - * the information for a further coarsening is stored inside #coarser_level, so that the - * hierarchy of coarsened levels is reflected inside this data structure. - * - * As for \ref system_data, internal vectors and matrices are initialized to the proper size, - * but their values are \b not initialized. - */ - template< - typename IOType, - typename NonzeroType - > - struct multi_grid_data : public system_data< IOType, NonzeroType > { - - const std::size_t finer_size; ///< ssize of the finer system to coarse from; - ///< typically \c finer_size \code == 8 * \endcode #system_size - - grb::Vector< IOType > Ax_finer; ///< finer vector for intermediate computations, of size #finer_size - - grb::Matrix< NonzeroType > coarsening_matrix; ///< matrix of size #system_size \f$ \times \f$ #finer_size - ///< to coarsen an input vector of size #finer_size into a vector of size #system_size - - struct multi_grid_data< IOType, NonzeroType > * coarser_level; ///< pointer to next coarsening level, for recursive - ///< multi-grid V cycle implementations - - /** - * @brief Construct a new \c multi_grid_data_object by initializing internal data structures and setting - * #coarser_level to \c nullptr. - * @param[in] coarser_size size of the current system, i.e. size \b after coarsening - * @param[in] _finer_size size of the finer system, i.e. size of external objects \b before coarsening - */ - multi_grid_data( std::size_t coarser_size, std::size_t _finer_size ) : - system_data< IOType, NonzeroType >( coarser_size ), - finer_size( _finer_size ), - Ax_finer( finer_size ), - coarsening_matrix( coarser_size, finer_size ) { - coarser_level = nullptr; - } - - /** - * @brief Destroys the \c multi_grid_data_object object by destroying #coarser_level. - */ - virtual ~multi_grid_data() { - if( coarser_level != nullptr ) { - delete coarser_level; - } - } - - grb::RC zero_temp_vectors() { - grb::RC rc = this->system_data< IOType, NonzeroType >::zero_temp_vectors(); - rc = rc ? rc : grb::set( Ax_finer, 0 ); - return rc; - } - }; - - /** - * @brief Data stucture to store the data for a full HPCG run: system vectors and matrix, - * coarsening information and temporary vectors. - * - * This data structures contains all the needed vectors and matrices to solve a linear system - * \f$ A x = b \f$. As for \ref system_data, internal elements are built and their sizes properly initialized - * to #system_size, but internal values are \b not initialized, as they are left to user's logic. - * Similarly, the coarsening information in #coarser_level is to be initialized by users by properly - * building a \code multi_grid_data \endcode object and storing its pointer into - * #coarser_level; on destruction, #coarser_level will also be properly destroyed without - * user's intervention. - * - * @tparam IOType type of values of the vectors for intermediate results - * @tparam NonzeroType type of the values stored inside the system matrix #A - * @tparam InputType type of the values of the right-hand side vector #b - */ - template< typename IOType, typename NonzeroType, typename InputType > - struct hpcg_data : public system_data< IOType, NonzeroType > { - - grb::Vector< InputType > b; ///< right-side vector of known values - grb::Vector< IOType > u; ///< temporary vectors (typically for CG exploration directions) - grb::Vector< IOType > p; ///< temporary vector (typically for x refinements coming from the multi-grid run) - grb::Vector< IOType > x; // system solution being refined over the iterations: it us up to the user - ///< to set the initial solution value - - struct multi_grid_data< IOType, NonzeroType > * coarser_level; ///< information about the coarser system, for - ///< the multi-grid run - - /** - * @brief Construct a new \c hpcg_data object by building vectors and matrices and by setting - * #coarser_level to \c nullptr (i.e. no coarser level is assumed). - * - * @param[in] sys_size the size of the simulated system, i.e. of all the internal vectors and matrices - */ - hpcg_data( std::size_t sys_size ) : system_data< IOType, NonzeroType >( sys_size ), b( sys_size ), u( sys_size ), p( sys_size ), x( sys_size ) { - coarser_level = nullptr; - } - - /** - * @brief Destroy the \c hpcg_data object by destroying the #coarser_level informartion, if any. - */ - virtual ~hpcg_data() { - if( coarser_level != nullptr ) { - delete coarser_level; - } - } - - grb::RC zero_temp_vectors() { - grb::RC rc = this->system_data< IOType, NonzeroType >::zero_temp_vectors(); - rc = rc ? rc : grb::set( u, 0 ); - rc = rc ? rc : grb::set( p, 0 ); - return rc; - } - }; - - } // namespace algorithms - -} // namespace grb - -#endif // _H_GRB_ALGORITHMS_HPCG_DATA - diff --git a/include/graphblas/algorithms/hpcg/matrix_building_utils.hpp b/include/graphblas/algorithms/hpcg/matrix_building_utils.hpp deleted file mode 100644 index 45486e99b..000000000 --- a/include/graphblas/algorithms/hpcg/matrix_building_utils.hpp +++ /dev/null @@ -1,310 +0,0 @@ - -/* - * Copyright 2021 Huawei Technologies Co., Ltd. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * @file hpcg_matrix_building_utils.hpp - * @author Alberto Scolari (alberto.scolari@huawei.com) - * @brief Utilities to build the matrices for HPCG simulations in an arbitrary number of dimensions. - * @date 2021-04-30 - */ - -#ifndef _H_GRB_ALGORITHMS_MATRIX_BUILDING_UTILS -#define _H_GRB_ALGORITHMS_MATRIX_BUILDING_UTILS - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include "ndim_matrix_builders.hpp" - - -namespace grb { - namespace algorithms { - - template< typename T > void partition_nonzeroes( - T num_nonzeroes, - T& first_offset, - T& last_offset - ) { - const size_t num_procs{ spmd<>::nprocs() }; - const T per_process{ ( num_nonzeroes + num_procs - 1 ) / num_procs }; // round up - first_offset = std::min( per_process * static_cast< T >( spmd<>::pid() ), num_nonzeroes ); - last_offset = std::min( first_offset + per_process, num_nonzeroes ); - } - - template< typename IterT > void partition_iteration_range( - size_t num_nonzeroes, - IterT &begin, - IterT &end - ) { - static_assert( std::is_base_of< std::random_access_iterator_tag, - typename std::iterator_traits< IterT >::iterator_category >::value, - "the given iterator is not a random access one" ); - assert( num_nonzeroes == static_cast< size_t >( end - begin ) ); - size_t first, last; - partition_nonzeroes( num_nonzeroes, first, last ); - if( last < num_nonzeroes ) { - end = begin; - end += last; - } - begin += first; - } - - /** - * @brief Builds a \p DIMS -dimensional system matrix for HPCG simulation. - * - * This routine initializes \p M to a matrix representing a \p DIMS -dimensions system of sizes - * \p sys_sizes, with an iteration halo of size \p halo_size . The matrix diagonal values are initialized - * to \p diag_value while the other non-zero values are initialized to \p non_diag_value . - * - * @tparam DIMS system dimensions - * @tparam T type of matrix values - * @tparam B matrix GraphBLAS backend - * @param M the matrix to be initialized; it must be already constructed - * @param sys_sizes the sizes of the physical system - * @param halo_size the size of the halo of point to iterate in - * @param diag_value diagonal value - * @param non_diag_value value outside of the diagonal - * @return grb::RC the success value returned when trying to build the matrix - */ - template< - std::size_t DIMS, - typename coord_t, - typename T, - enum grb::Backend B - > grb::RC build_ndims_system_matrix( - grb::Matrix< T, B > & M, - const grb::algorithms::hpcg_builder< DIMS, coord_t, T > & hpcg_system, - T diag_value, - T non_diag_value - ) { - if( hpcg_system.system_size() > std::numeric_limits< coord_t >::max() ) { - throw std::domain_error( "CoordT cannot store the matrix coordinates" ); - } - /* - std::array< coord_t, DIMS > _sys_sizes; - for( size_t i = 0; i < DIMS; i++ ) _sys_sizes[i] = sys_sizes[i]; - grb::algorithms::hpcg_builder< DIMS, coord_t, T > hpcg_system( _sys_sizes, halo_size ); - */ - grb::algorithms::matrix_generator_iterator< DIMS, coord_t, T > begin( - hpcg_system.make_begin_iterator( diag_value, non_diag_value ) ); - grb::algorithms::matrix_generator_iterator< DIMS, coord_t, T > end( - hpcg_system.make_end_iterator( diag_value, non_diag_value ) - ); - partition_iteration_range( hpcg_system.num_neighbors(), begin, end ); - - return buildMatrixUnique( M, begin, end, grb::IOMode::PARALLEL ); - } - - /** - * @brief Builds a coarsener matrix for an HPCG simulation. - * - * It initializes \p M as a rectangular matrix, with rows corresponding to the coarser system - * (of dimensions \p coarser_sizes - output) and columns corresponding to the finer system - * (of dimensions \p finer_sizes - input). The resulting coarsening matrix takes in input the finer system - * and coarsens it by keeping one element every \a S , where \a S is the ratio between the finer and - * the coarser dimension (computed for each dimension). In this way each \p DIMS -dimensional finer element - * corresponds to its bounding coarser element. - * - * For the coarsening to be feasible, the sizes of the finer system \b must be a multiple of those of the - * coarser system. If this condition is not met, an exception is thrown. - * - * @tparam DIMS system dimensions - * @tparam T type of matrix values - * @tparam B matrix GraphBLAS backend - * @param M the matrix to be initialized; it must be already constructed with proper dimensions - * @param coarser_sizes sizes of the coarser system - * @param finer_sizes sizes of the finer system; each one \b must be a multiple of the corresponding value - * in \p coarser_size , otherwise an exception is thrown - * @return grb::RC the success value returned when trying to build the matrix - */ - template< - std::size_t DIMS, - typename T, - enum grb::Backend B - > grb::RC build_ndims_coarsener_matrix( - grb::Matrix< T, B > & M, - const std::array< std::size_t, DIMS > & coarser_sizes, - const std::array< std::size_t, DIMS > & finer_sizes - ) { - static_assert( DIMS > 0, "DIMS must be > 0" ); - size_t const rows { std::accumulate( coarser_sizes.cbegin(), coarser_sizes.cend(), 1UL, std::multiplies< size_t >() ) }; - for( std::size_t i { 0 }; i < coarser_sizes.size(); i++ ) { - std::size_t step = finer_sizes[ i ] / coarser_sizes[ i ]; - if( step * coarser_sizes[ i ] != finer_sizes[ i ] ) { - throw std::invalid_argument( "finer sizes should be a multiple of " - "coarser sizes" ); - } - } - std::size_t const cols { std::accumulate( finer_sizes.cbegin(), finer_sizes.cend(), 1UL, std::multiplies< std::size_t >() ) }; - if( grb::nrows( M ) != rows || grb::ncols( M ) != cols ) { - throw std::invalid_argument( "wrong matrix dimensions: matrix should " - "be rectangular" - " with rows == " - "and cols == " ); - } - using coord_t = unsigned; - if( rows > std::numeric_limits< coord_t >::max() ) { - throw std::domain_error( "CoordT cannot store the row coordinates" ); - } - if( cols > std::numeric_limits< coord_t >::max() ) { - throw std::domain_error( "CoordT cannot store the column coordinates" ); - } - std::array< coord_t, DIMS > _coarser_sizes, _finer_sizes; - for( size_t i = 0; i < DIMS; i++ ) { - _coarser_sizes[i] = coarser_sizes[i]; - _finer_sizes[i] = finer_sizes[i]; - } - grb::algorithms::hpcg_coarsener_builder< DIMS, coord_t, T > coarsener( _coarser_sizes, _finer_sizes ); - grb::algorithms::coarsener_generator_iterator< DIMS, coord_t, T > begin( coarsener.make_begin_iterator() ); - grb::algorithms::coarsener_generator_iterator< DIMS, coord_t, T > end( - coarsener.make_end_iterator() - ); - partition_iteration_range( coarsener.system_size(), begin, end ); - return buildMatrixUnique( M, begin, end, grb::IOMode::PARALLEL ); - } - - template< typename CoordT > - struct true_iter { - - static const bool __TRUE = true; - - using self_t = true_iter< CoordT >; - using iterator_category = std::random_access_iterator_tag; - using value_type = bool; - using pointer = const bool *; - using reference = const bool&; - using difference_type = long; - - true_iter() = delete; - - true_iter( CoordT first ): index( first ) {} - - true_iter( const self_t & ) = default; - - self_t & operator=( const self_t & ) = default; - - bool operator!=( const self_t & other ) const { - return this->index != other.index; - } - - self_t & operator++() noexcept { - (void) index++; - return *this; - } - - self_t & operator+=( size_t increment ) noexcept { - index += increment; - return *this; - } - - difference_type operator-( const self_t & other ) noexcept { - return static_cast< difference_type >( this->index - other.index ); - } - - pointer operator->() const { - return &__TRUE; - } - - reference operator*() const { - return *(this->operator->()); - } - - private: - CoordT index; - }; - - template< typename CoordT > const bool true_iter< CoordT >::__TRUE; - - /** - * @brief Populates \p masks with static color mask generated for a squared matrix of size \p matrix_size . - * - * Colors are built in the range [0, \p colors ), with the mask for color 0 being the array - * of values true in the positions \f$ [0, colors, 2*colors, ..., floor((system_size - 1)/colors) * color] \f$, - * for color 1 in the positions \f$ [1, 1+colors, 1+2*colors, ..., floor((system_size - 2)/colors) * color] \f$, - * etc.; the mask for color 0 is in \c masks[0], for color 1 in \c masks[1] and so on. - * - * The vectors stored in \p masks (assumed empty at the beginning) are built inside the function and populated - * only with the \c true values, leading to sparse vectors. This saves on storage space and allows - * GraphBLAS routines (like \c eWiseLambda() ) to iterate only on true values. - * - * @tparam B GraphBLAS backend for the vector - * @param masks output vector of color masks - * @param matrix_size size of the system matrix - * @param colors numbers of colors masks to build; it must be < \p matrix_size - * @return grb::RC the success value returned when trying to build the vector - */ - template< enum grb::Backend B > - grb::RC build_static_color_masks( - std::size_t matrix_size, - const std::vector< std::vector< size_t > > &per_color_rows, - std::vector< grb::Vector< bool, B > > & masks - ) { - if( ! masks.empty() ) { - throw std::invalid_argument( "vector of masks is expected to be empty" ); - } - for( size_t i = 0; i < per_color_rows.size(); i++ ) { - const std::vector< size_t > & rows = per_color_rows[ i ]; - /* - { - std::cout << "\ncolor " << i << std::endl; - for( size_t row : rows ) { - std::cout << row << " "; - } - std::cout << std::endl; - } - */ - masks.emplace_back( matrix_size ); - grb::Vector< bool > & output_mask = masks.back(); - std::vector< size_t >::const_iterator begin = rows.cbegin(); - std::vector< size_t >::const_iterator end = rows.cend(); - // partition_iteration_range( rows.size(), begin, end ); - grb::RC rc = grb::buildVectorUnique( output_mask, begin , end, true_iter< size_t >( 0 ), - true_iter< size_t >( std::distance( begin, end ) ), IOMode::SEQUENTIAL ); - if( rc != SUCCESS ) { - std::cerr << "error while creating output mask for color " << i << ": " - << toString( rc ) << std::endl; - return rc; - } - /* - { - std::cout << "mask color " << i << std::endl; - size_t count = 0; - for( const auto & v : output_mask ) { - std::cout << v.first << " "; - count++; - if( count > 20 ) break; - } - std::cout << std::endl; - } - */ - } - return grb::SUCCESS; - } - - } // namespace algorithms -} // namespace grb - -#endif // _H_GRB_ALGORITHMS_MATRIX_BUILDING_UTILS diff --git a/include/graphblas/algorithms/hpcg/multigrid_v_cycle.hpp b/include/graphblas/algorithms/hpcg/multigrid_v_cycle.hpp deleted file mode 100644 index 7541a387f..000000000 --- a/include/graphblas/algorithms/hpcg/multigrid_v_cycle.hpp +++ /dev/null @@ -1,252 +0,0 @@ - -/* - * Copyright 2021 Huawei Technologies Co., Ltd. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * @file multigrid_v_cycle.hpp - * @author Alberto Scolari (alberto.scolari@huawei.com) - * @brief This file contains the routines for multi-grid solution refinement, including the main routine - * and those for coarsening and refinement of the tentative solution. - * @date 2021-04-30 - */ - -#ifndef _H_GRB_ALGORITHMS_MULTIGRID_V_CYCLE -#define _H_GRB_ALGORITHMS_MULTIGRID_V_CYCLE - -#include -#include - -#include - -#include "hpcg_data.hpp" -#include "red_black_gauss_seidel.hpp" - - -namespace grb { - namespace algorithms { - /** - * @brief Namespace for interfaces that should not be used outside of the algorithm namespace. - */ - namespace internal { - - /** - * @brief computes the coarser residual vector \p coarsening_data.r by coarsening - * \p coarsening_data.Ax_finer - \p r_fine via \p coarsening_data.coarsening_matrix. - * - * The coarsening information are stored inside \p coarsening_data. - * - * @tparam IOType type of result and intermediate vectors used during computation - * @tparam NonzeroType type of matrix values - * @tparam Ring the ring of algebraic operators zero-values - * @tparam Minus the minus operator for subtractions - * - * @param[in] r_fine fine residual vector - * @param[in,out] coarsening_data \ref multi_grid_data data structure storing the information for coarsening - * @param[in] ring the ring to perform the operations on - * @param[in] minus the \f$ - \f$ operator for vector subtractions - * @return grb::RC::SUCCESS if the algorithm could correctly terminate, the error code of the first - * unsuccessful operation otherwise - */ - template< typename IOType, - typename NonzeroType, - class Ring, - class Minus > - grb::RC compute_coarsening( const grb::Vector< IOType > & r_fine, // fine residual - struct multi_grid_data< IOType, NonzeroType > & coarsening_data, - const Ring & ring, - const Minus & minus ) { - RC ret { SUCCESS }; - ret = ret ? ret : grb::eWiseApply( coarsening_data.Ax_finer, r_fine, coarsening_data.Ax_finer, - minus ); // Ax_finer = r_fine - Ax_finer - assert( ret == SUCCESS ); - - // actual coarsening, from ncols(*coarsening_data->A) == *coarsening_data->system_size * 8 - // to *coarsening_data->system_size - ret = ret ? ret : grb::set( coarsening_data.r, 0 ); - ret = ret ? ret : grb::mxv< grb::descriptors::dense >( coarsening_data.r, coarsening_data.coarsening_matrix, coarsening_data.Ax_finer, - ring ); // r = coarsening_matrix * Ax_finer - return ret; - } - - /** - * @brief computes the prolongation of the coarser solution \p coarsening_data.z and stores it into - * \p x_fine. - * - * For prolongation, this function uses the matrix \p coarsening_data.coarsening_matrix by transposing it. - * - * @tparam IOType type of result and intermediate vectors used during computation - * @tparam NonzeroType type of matrix values - * @tparam Ring the ring of algebraic operators zero-values - * - * @param[out] x_fine the solution vector to store the prolonged solution into - * @param[in,out] coarsening_data information for coarsening - * @param[in] ring the ring to perform the operations on - * @return grb::RC::SUCCESS if the algorithm could correctly terminate, the error code of the first - * unsuccessful operation otherwise - */ - template< typename IOType, - typename NonzeroType, - class Ring > - grb::RC compute_prolongation( grb::Vector< IOType > & x_fine, // fine residual - struct multi_grid_data< IOType, NonzeroType > & coarsening_data, - const Ring & ring ) { - RC ret { SUCCESS }; - // actual refining, from *coarsening_data->syztem_size == nrows(*coarsening_data->A) / 8 - // to nrows(x_fine) - ret = ret ? ret : set( coarsening_data.Ax_finer, 0 ); - - ret = ret ? ret : grb::mxv< grb::descriptors::transpose_matrix | grb::descriptors::dense >( coarsening_data.Ax_finer, coarsening_data.coarsening_matrix, coarsening_data.z, ring ); - assert( ret == SUCCESS ); - - ret = ret ? ret : grb::foldl( x_fine, coarsening_data.Ax_finer, ring.getAdditiveMonoid() ); // x_fine += Ax_finer; - assert( ret == SUCCESS ); - return ret; - } - - /** - * @brief Runs \p smoother_steps iteration of the Red-Black Gauss-Seidel smoother, with inputs and outputs stored - * inside \p data. - * - * @tparam IOType type of result and intermediate vectors used during computation - * @tparam NonzeroType type of matrix values - * @tparam Ring the ring of algebraic operators zero-values - * - * @param[in,out] data \ref system_data data structure with relevant inpus and outputs: system matrix, initial solution, - * residual, system matrix colors, temporary vectors - * @param[in] smoother_steps how many smoothing steps to run - * @param[in] ring the ring to perform the operations on - * @return grb::RC::SUCCESS if the algorithm could correctly terminate, the error code of the first - * unsuccessful operation otherwise - */ - template< typename IOType, typename NonzeroType, class Ring > - grb::RC run_smoother( system_data< IOType, NonzeroType > & data, const std::size_t smoother_steps, const Ring & ring ) { - RC ret { SUCCESS }; - - for( std::size_t i { 0 }; i < smoother_steps && ret == SUCCESS; i++ ) { - ret = ret ? ret : red_black_gauss_seidel( data, ring ); - assert( ret == SUCCESS ); - } - return ret; - } - - /** - * @brief Multi-grid V cycle implementation to refine a given solution. - * - * A full multi-grid run goes through the following steps: - * -# if \p presmoother_steps \f$ > 0 \f$, \p presmoother_steps of the Red-Black Gauss-Seidel smoother are run - * to improve on the initial solution stored into \p data.z - * -# the coarsening of \f$ r - A*z \f$ is computed to find the coarser residual vector - * -# a multi-grid run is recursively performed on the coarser system - * -# the tentative solution from the coarser multi-grid run is prolonged and added to the current tentative solution - * into \p data.z - * -# this solution is further smoothed for \p postsmoother_steps steps - * - * If coarsening information is not available, the multi-grid run consists in a single smmothing run. - * - * Failuers of GraphBLAS operations are handled by immediately stopping the execution and by returning - * the failure code. - * - * @tparam IOType type of result and intermediate vectors used during computation - * @tparam NonzeroType type of matrix values - * @tparam Ring the ring of algebraic operators zero-values - * @tparam Minus the minus operator for subtractions - * - * @param[in,out] data \ref multi_grid_data object storing the relevant data for the multi-grid run of the current - * clevel - * @param[in,out] coarsening_data pointer to information for the coarsening/refinement operations and for the - * recursive multi-grid run on the coarsened system; if \c nullptr, no coarsening/refinement occurs - * and only smoothing occurs on the current solution - * @param[in] presmoother_steps number of pre-smoother steps - * @param[in] postsmoother_steps number of post-smoother steps - * @param[in] ring the ring to perform the operations on - * @param[in] minus the \f$ - \f$ operator for vector subtractions - * @return grb::RC::SUCCESS if the algorithm could correctly terminate, the error code of the first - * unsuccessful operation otherwise - */ - template< typename IOType, typename NonzeroType, class Ring, class Minus > - grb::RC multi_grid( system_data< IOType, NonzeroType > & data, - struct multi_grid_data< IOType, NonzeroType > * const coarsening_data, - const size_t presmoother_steps, - const size_t postsmoother_steps, - const Ring & ring, - const Minus & minus ) { - RC ret { SUCCESS }; -#ifdef HPCG_PRINT_STEPS - DBG_println( "mg BEGINNING {" ); -#endif - - // clean destination vector - ret = ret ? ret : grb::set( data.z, 0 ); -#ifdef HPCG_PRINT_STEPS - DBG_print_norm( data.r, "initial r" ); -#endif - if( coarsening_data == nullptr ) { - // compute one round of Gauss Seidel and return - ret = ret ? ret : run_smoother( data, 1, ring ); - assert( ret == SUCCESS ); -#ifdef HPCG_PRINT_STEPS - DBG_print_norm( data.z, "smoothed z" ); - DBG_println( "} mg END" ); -#endif - return ret; - } - - struct multi_grid_data< IOType, NonzeroType > & cd { - *coarsening_data - }; - - // pre-smoother - ret = ret ? ret : run_smoother( data, presmoother_steps, ring ); - assert( ret == SUCCESS ); -#ifdef HPCG_PRINT_STEPS - DBG_print_norm( data.z, "pre-smoothed z" ); -#endif - - ret = ret ? ret : grb::set( cd.Ax_finer, 0 ); - ret = ret ? ret : grb::mxv< grb::descriptors::dense >( cd.Ax_finer, data.A, data.z, ring ); - assert( ret == SUCCESS ); - - ret = ret ? ret : compute_coarsening( data.r, cd, ring, minus ); - assert( ret == SUCCESS ); -#ifdef HPCG_PRINT_STEPS - DBG_print_norm( cd.r, "coarse r" ); -#endif - - ret = ret ? ret : multi_grid( cd, cd.coarser_level, presmoother_steps, postsmoother_steps, ring, minus ); - assert( ret == SUCCESS ); - - ret = ret ? ret : compute_prolongation( data.z, cd, ring ); - assert( ret == SUCCESS ); -#ifdef HPCG_PRINT_STEPS - DBG_print_norm( data.z, "prolonged z" ); -#endif - - // post-smoother - ret = ret ? ret : run_smoother( data, postsmoother_steps, ring ); - assert( ret == SUCCESS ); -#ifdef HPCG_PRINT_STEPS - DBG_print_norm( data.z, "post-smoothed z" ); - DBG_println( "} mg END" ); -#endif - - return ret; - } - - } // namespace internal - } // namespace algorithms -} // namespace grb - -#endif // _H_GRB_ALGORITHMS_MULTIGRID_V_CYCLE diff --git a/include/graphblas/algorithms/hpcg/ndim_matrix_builders.hpp b/include/graphblas/algorithms/hpcg/ndim_matrix_builders.hpp index 35a15238d..5958ecb0d 100644 --- a/include/graphblas/algorithms/hpcg/ndim_matrix_builders.hpp +++ b/include/graphblas/algorithms/hpcg/ndim_matrix_builders.hpp @@ -47,234 +47,52 @@ #include #include -#include +#include -#include -#include -#include namespace grb { - namespace algorithms { template< size_t DIMS, - typename CoordT, - typename T + typename CoordType, + typename ValueType > - class hpcg_builder; - - template< - size_t DIMS, - typename CoordT, - typename T - > - struct matrix_generator_iterator { - - using RowIndexType = CoordT; ///< numeric type of rows - using ColumnIndexType = CoordT; - using ValueType = T; - friend hpcg_builder< DIMS, CoordT, T >; - - using linear_system_t = grb::utils::geometry::linearized_halo_ndim_system< RowIndexType, DIMS >; - using __iter_t = typename linear_system_t::iterator; - using self_t = matrix_generator_iterator< DIMS, CoordT, T >; + class HPCGBuilder { + public: - struct __value { + struct HPCGDiagGenerator { - friend self_t; + ValueType _diag; + ValueType _non_diag; - __value( + HPCGDiagGenerator( ValueType diag, - ValueType non_diag, - RowIndexType i, - ColumnIndexType j - ) noexcept : - diagonal_value( diag ), - non_diagonal_value( non_diag ), - _i( i ), - _j( j ) - {} + ValueType non_diag + ) : _diag( diag ), + _non_diag( non_diag ) {} - __value( const __value & ) = default; - - __value & operator=( const __value & ) = default; + HPCGDiagGenerator & operator=( const HPCGDiagGenerator & ) = default; - inline RowIndexType i() const { return _i; } - inline ColumnIndexType j() const { return _j; } - inline ValueType v() const { - return j() == i() ? diagonal_value : non_diagonal_value; + inline ValueType operator()( const CoordType &i, const CoordType &j ) const noexcept { + return j == i ? _diag: _non_diag; } - - private: - ValueType diagonal_value; ///< value to be emitted when the object has moved to the diagonal - ValueType non_diagonal_value; ///< value to emit outside of the diagonal - RowIndexType _i; - ColumnIndexType _j; }; - // interface for std::random_access_iterator - using iterator_category = std::random_access_iterator_tag; - using value_type = __value; - using pointer = value_type; - using reference = value_type; - using difference_type = typename __iter_t::difference_type; - - matrix_generator_iterator( const self_t & ) = default; - - matrix_generator_iterator( self_t && ) = default; - - self_t & operator=( const self_t & ) = default; - - self_t & operator=( self_t && ) = default; - - /** - * @brief Increments the iterator by moving coordinates to the next (row, column) to iterate on. - * - * This operator internally increments the columns coordinates until wrap-around, when it increments - * the row coordinates and resets the column coordinates to the first possible columns; this column coordinate - * depends on the row coordinates according to the dimensions iteration order and on the parameter \p halo. - * - * @return matrix_generator_iterator& \c this object, with the updated state - */ - self_t & operator++() noexcept { - (void) ++_sys_iter; - update_coords(); - return *this; - } - - self_t & operator+=( size_t offset ) { - _sys_iter += offset; - update_coords(); - return *this; - } - - difference_type operator-( const self_t &other ) const { - return this->_sys_iter - other._sys_iter; - } - - /** - * @brief Operator to compare \c this against \p o and return whether they differ. - * - * @param o object to compare \c this against - * @return true of the row or the column is different between \p o and \c this - * @return false if both row and column of \p o and \c this are equal - */ - bool operator!=( const self_t &o ) const { - return this->_sys_iter != o._sys_iter; - } - - /** - * @brief Operator to compare \c this against \p o and return whether they are equal. - * - * @param o object to compare \c this against - * @return true of the row or the column is different between \p o and \c this - * @return false if both row and column of \p o and \c this are equal - */ - bool operator==( const self_t &o ) const { - return ! operator!=( o ); - } - - /** - * @brief Operator returning the triple to directly access row, column and element values. - * - * Useful when building the matrix by copying the triple of coordinates and value, - * like for the BSP1D backend. - */ - reference operator*() const { - return _val; - } - - pointer operator->() const { - return &_val; - } - - /** - * @brief Returns current row. - */ - inline RowIndexType i() const { - return _val.i(); - } - - /** - * @brief Returns current column. - */ - inline ColumnIndexType j() const { - return _val.j(); - } - - /** - * @brief Returns the current matrix value. - * - * @return ValueType #diagonal_value if \code row == column \endcode (i.e. if \code this-> \endcode - * #i() \code == \endcode \code this-> \endcode #j()), #non_diagonal_value otherwise - */ - inline ValueType v() const { - return _val.v(); - } - - const __iter_t & it() const { - return this->_sys_iter; - } + using HaloSystemType = grb::utils::geometry::LinearizedHaloNDimSystem< CoordType, DIMS >; + using Iterator = geometry::HaloMatrixGeneratorIterator< DIMS, CoordType, ValueType, HPCGDiagGenerator >; - private: - value_type _val; - const linear_system_t *_lin_system; - __iter_t _sys_iter; - - /** - * @brief Construct a new \c matrix_generator_iterator object, setting the current row as \p row - * and emitting \p diag if the iterator has moved on the diagonal, \p non_diag otherwise. - * - * @param sizes array with the sizes along the dimensions - * @param _halo halo of points to iterate around; must be > 0 - * @param diag value to emit when on the diagonal - * @param non_diag value to emit outside the diagonal - */ - matrix_generator_iterator( - const linear_system_t &system, + HPCGBuilder( + const std::array< CoordType, DIMS > &sizes, + CoordType _halo, ValueType diag, ValueType non_diag - ) noexcept : - _val( diag, non_diag, 0, 0 ), - _lin_system( &system ), - _sys_iter( system.begin() ) - { - update_coords(); - } - - void update_coords() { - _val._i = _sys_iter->get_element_linear(); - _val._j = _sys_iter->get_neighbor_linear(); - } - }; - - - template< - size_t DIMS, - typename CoordT, - typename T - > - class hpcg_builder { - - using system_t = grb::utils::geometry::linearized_halo_ndim_system< CoordT, DIMS >; - - system_t system; - // const grb::utils::geometry::linearized_halo_ndim_system< CoordT, DIMS > system; - const CoordT halo; - - public: - - using hpcg_sys_iterator = matrix_generator_iterator< DIMS, CoordT, T >; - - hpcg_builder( - const std::array< CoordT, DIMS > &sizes, - CoordT _halo ) : + halo( _halo ), system( sizes, _halo ), - halo( _halo ) + _diag_generator( diag, non_diag ) { if( _halo <= 0 ) { throw std::invalid_argument( "halo should be higher than 0" ); @@ -286,13 +104,14 @@ namespace grb { } } - hpcg_builder( const hpcg_builder< DIMS, CoordT, T> & ) = delete; - hpcg_builder( hpcg_builder< DIMS, CoordT, T> && ) = delete; + HPCGBuilder( const HPCGBuilder< DIMS, CoordType, ValueType > & ) = default; + + HPCGBuilder( HPCGBuilder< DIMS, CoordType, ValueType > && ) = default; - hpcg_builder< DIMS, CoordT, T> & operator=( const hpcg_builder< DIMS, CoordT, T> & ) = delete; + HPCGBuilder< DIMS, CoordType, ValueType > & operator=( const HPCGBuilder< DIMS, CoordType, ValueType > & ) = default; - hpcg_builder< DIMS, CoordT, T> & operator=( hpcg_builder< DIMS, CoordT, T> && ) = delete; + HPCGBuilder< DIMS, CoordType, ValueType > & operator=( HPCGBuilder< DIMS, CoordType, ValueType > && ) = default; size_t system_size() const { return system.base_system_size(); @@ -302,27 +121,34 @@ namespace grb { return system.halo_system_size(); } - const system_t & get_generator() const { + const HaloSystemType & get_generator() const { return system; } - hpcg_sys_iterator make_begin_iterator( - T diag, - T non_diag - ) const { - return hpcg_sys_iterator( system, diag, non_diag ); + Iterator make_begin_iterator() const { + return Iterator( system, _diag_generator ); } - hpcg_sys_iterator make_end_iterator( - T diag, - T non_diag - ) const { - hpcg_sys_iterator result( system, diag, non_diag ); + Iterator make_end_iterator() const { + Iterator result( system, _diag_generator ); result += num_neighbors() - 1; // do not trigger boundary checks ++result; return result; } + ValueType get_diag_value() const { + return _diag_generator._diag; + } + + ValueType get_non_diag_value() const { + return _diag_generator._non_diag; + } + + + private: + const CoordType halo; + HaloSystemType system; + HPCGDiagGenerator _diag_generator; }; @@ -356,7 +182,7 @@ namespace grb { template< size_t DIMS, - typename CoordT, + typename CoordType, typename T > class hpcg_coarsener_builder; @@ -377,26 +203,26 @@ namespace grb { */ template< size_t DIMS, - typename CoordT, + typename CoordType, typename T > struct coarsener_generator_iterator { - friend hpcg_coarsener_builder< DIMS, CoordT, T >; + friend hpcg_coarsener_builder< DIMS, CoordType, T >; - using RowIndexType = CoordT; ///< numeric type of rows - using ColumnIndexType = CoordT; + using RowIndexType = CoordType; ///< numeric type of rows + using ColumnIndexType = CoordType; using ValueType = T; - using lin_system_t = grb::utils::geometry::linearized_ndim_system< CoordT, - grb::utils::geometry::array_vector_storage< CoordT, DIMS > >; - using __iter_t = typename lin_system_t::iterator; - using self_t = coarsener_generator_iterator< DIMS, CoordT, T >; - using array_t = std::array< CoordT, DIMS >; + using lin_system_t = grb::utils::geometry::LinearizedNDimSystem< CoordType, + grb::utils::geometry::ArrayVectorStorage< CoordType, DIMS > >; + using __iter_t = typename lin_system_t::Iterator; + using SelfType = coarsener_generator_iterator< DIMS, CoordType, T >; + using array_t = std::array< CoordType, DIMS >; struct __value { - friend self_t; + friend SelfType; __value( RowIndexType i, @@ -428,13 +254,13 @@ namespace grb { using reference = const value_type&; using difference_type = typename __iter_t::difference_type; - coarsener_generator_iterator( const self_t & o ) = default; + coarsener_generator_iterator( const SelfType & o ) = default; - coarsener_generator_iterator( self_t && o ) = default; + coarsener_generator_iterator( SelfType && o ) = default; - self_t & operator=( const self_t & ) = default; + SelfType & operator=( const SelfType & ) = default; - self_t & operator=( self_t && ) = default; + SelfType & operator=( SelfType && ) = default; /** * @brief Increments the row and the column according to the respective physical sizes, @@ -442,33 +268,33 @@ namespace grb { * * @return \code *this \endcode, i.e. the same object with the updates row and column */ - self_t & operator++() noexcept { + SelfType & operator++() noexcept { (void) ++_sys_iter; update_coords(); return *this; } - self_t & operator+=( size_t offset ) { + SelfType & operator+=( size_t offset ) { _sys_iter += offset; update_coords(); return *this; } - difference_type operator-( const self_t &o ) const { + difference_type operator-( const SelfType &o ) const { return this->_sys_iter - o._sys_iter; } /** * @brief Returns whether \c this and \p o differ. */ - bool operator!=( const self_t &o ) const { + bool operator!=( const SelfType &o ) const { return this->_sys_iter != o._sys_iter; } /** * @brief Returns whether \c this and \p o are equal. */ - bool operator==( const self_t &o ) const { + bool operator==( const SelfType &o ) const { return ! this->operator!=( o ); } @@ -566,14 +392,14 @@ namespace grb { template< size_t DIMS, - typename CoordT, + typename CoordType, typename T > class hpcg_coarsener_builder { public: - using array_t = std::array< CoordT, DIMS >; - using hpcg_coarsener_iterator = coarsener_generator_iterator< DIMS, CoordT, T >; + using array_t = std::array< CoordType, DIMS >; + using hpcg_coarsener_iterator = coarsener_generator_iterator< DIMS, CoordType, T >; hpcg_coarsener_builder( const array_t &_coarser_sizes, @@ -592,13 +418,13 @@ namespace grb { } } - hpcg_coarsener_builder( const hpcg_coarsener_builder< DIMS, CoordT, T> & ) = delete; + hpcg_coarsener_builder( const hpcg_coarsener_builder< DIMS, CoordType, T> & ) = delete; - hpcg_coarsener_builder( hpcg_coarsener_builder< DIMS, CoordT, T> && ) = delete; + hpcg_coarsener_builder( hpcg_coarsener_builder< DIMS, CoordType, T> && ) = delete; - hpcg_coarsener_builder< DIMS, CoordT, T> & operator=( const hpcg_coarsener_builder< DIMS, CoordT, T> & ) = delete; + hpcg_coarsener_builder< DIMS, CoordType, T> & operator=( const hpcg_coarsener_builder< DIMS, CoordType, T> & ) = delete; - hpcg_coarsener_builder< DIMS, CoordT, T> & operator=( hpcg_coarsener_builder< DIMS, CoordT, T> && ) = delete; + hpcg_coarsener_builder< DIMS, CoordType, T> & operator=( hpcg_coarsener_builder< DIMS, CoordType, T> && ) = delete; size_t system_size() const { return system.system_size(); @@ -616,8 +442,8 @@ namespace grb { } private: - const grb::utils::geometry::linearized_ndim_system< CoordT, - grb::utils::geometry::array_vector_storage< CoordT, DIMS > > system; + const grb::utils::geometry::LinearizedNDimSystem< CoordType, + grb::utils::geometry::ArrayVectorStorage< CoordType, DIMS > > system; array_t steps; ///< array of steps, i.e. how much each column coordinate (finer system) must be //// incremented when incrementing the row coordinates; is is the ration between diff --git a/include/graphblas/algorithms/hpcg/old_matrix_building_utils.hpp b/include/graphblas/algorithms/hpcg/old_matrix_building_utils.hpp deleted file mode 100644 index 9bb5c7a95..000000000 --- a/include/graphblas/algorithms/hpcg/old_matrix_building_utils.hpp +++ /dev/null @@ -1,173 +0,0 @@ - -/* - * Copyright 2021 Huawei Technologies Co., Ltd. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * @file hpcg_matrix_building_utils.hpp - * @author Alberto Scolari (alberto.scolari@huawei.com) - * @brief Utilities to build the matrices for HPCG simulations in an arbitrary number of dimensions. - * @date 2021-04-30 - */ - -#ifndef _H_GRB_ALGORITHMS_OLD_MATRIX_BUILDING_UTILS -#define _H_GRB_ALGORITHMS_OLD_MATRIX_BUILDING_UTILS - -#include -#include -#include -#include -#include -#include - -#include - -#include "old_ndim_matrix_builders.hpp" - - -namespace grb { - namespace algorithms { - namespace old { - - - /** - * @brief Builds a \p DIMS -dimensional system matrix for HPCG simulation. - * - * This routine initializes \p M to a matrix representing a \p DIMS -dimensions system of sizes - * \p sys_sizes, with an iteration halo of size \p halo_size . The matrix diagonal values are initialized - * to \p diag_value while the other non-zero values are initialized to \p non_diag_value . - * - * @tparam DIMS system dimensions - * @tparam T type of matrix values - * @tparam B matrix GraphBLAS backend - * @param M the matrix to be initialized; it must be already constructed - * @param sys_sizes the sizes of the physical system - * @param halo_size the size of the halo of point to iterate in - * @param diag_value diagonal value - * @param non_diag_value value outside of the diagonal - * @return grb::RC the success value returned when trying to build the matrix - */ - template< std::size_t DIMS, typename T, enum grb::Backend B > - grb::RC build_ndims_system_matrix( grb::Matrix< T, B > & M, const std::array< std::size_t, DIMS > & sys_sizes, std::size_t halo_size, T diag_value, T non_diag_value ) { - static_assert( DIMS > 0, "DIMS must be > 0" ); - std::size_t n { std::accumulate( sys_sizes.cbegin(), sys_sizes.cend(), 1UL, std::multiplies< std::size_t >() ) }; - if( grb::nrows( M ) != n || grb::nrows( M ) != grb::ncols( M ) ) { - throw std::invalid_argument( "wrong matrix dimensions: matrix should " - "be square" - " and in accordance with given system " - "sizes" ); - } - grb::algorithms::matrix_generator_iterator< DIMS, T > begin( sys_sizes, 0UL, halo_size, diag_value, non_diag_value ); - grb::algorithms::matrix_generator_iterator< DIMS, T > end( sys_sizes, n, halo_size, diag_value, non_diag_value ); - return buildMatrixUnique( M, begin, end, grb::IOMode::SEQUENTIAL ); - } - - /** - * @brief Builds a coarsener matrix for an HPCG simulation. - * - * It initializes \p M as a rectangular matrix, with rows corresponding to the coarser system - * (of dimensions \p coarser_sizes - output) and columns corresponding to the finer system - * (of dimensions \p finer_sizes - input). The resulting coarsening matrix takes in input the finer system - * and coarsens it by keeping one element every \a S , where \a S is the ratio between the finer and - * the coarser dimension (computed for each dimension). In this way each \p DIMS -dimensional finer element - * corresponds to its bounding coarser element. - * - * For the coarsening to be feasible, the sizes of the finer system \b must be a multiple of those of the - * coarser system. If this condition is not met, an exception is thrown. - * - * @tparam DIMS system dimensions - * @tparam T type of matrix values - * @tparam B matrix GraphBLAS backend - * @param M the matrix to be initialized; it must be already constructed with proper dimensions - * @param coarser_sizes sizes of the coarser system - * @param finer_sizes sizes of the finer system; each one \b must be a multiple of the corresponding value - * in \p coarser_size , otherwise an exception is thrown - * @return grb::RC the success value returned when trying to build the matrix - */ - template< std::size_t DIMS, typename T, enum grb::Backend B > - grb::RC build_ndims_coarsener_matrix( grb::Matrix< T, B > & M, const std::array< std::size_t, DIMS > & coarser_sizes, const std::array< std::size_t, DIMS > & finer_sizes ) { - static_assert( DIMS > 0, "DIMS must be > 0" ); - std::size_t const rows { std::accumulate( coarser_sizes.cbegin(), coarser_sizes.cend(), 1UL, std::multiplies< std::size_t >() ) }; - for( std::size_t i { 0 }; i < coarser_sizes.size(); i++ ) { - std::size_t step = finer_sizes[ i ] / coarser_sizes[ i ]; - if( step * coarser_sizes[ i ] != finer_sizes[ i ] ) { - throw std::invalid_argument( "finer sizes should be a multiple of " - "coarser sizes" ); - } - } - std::size_t const cols { std::accumulate( finer_sizes.cbegin(), finer_sizes.cend(), 1UL, std::multiplies< std::size_t >() ) }; - if( grb::nrows( M ) != rows || grb::ncols( M ) != cols ) { - throw std::invalid_argument( "wrong matrix dimensions: matrix should " - "be rectangular" - " with rows == " - "and cols == " ); - } - - grb::algorithms::coarsener_generator_iterator< DIMS, T > begin( coarser_sizes, finer_sizes, 0 ); - grb::algorithms::coarsener_generator_iterator< DIMS, T > end( coarser_sizes, finer_sizes, rows ); - return buildMatrixUnique( M, begin, end, grb::IOMode::SEQUENTIAL ); - } - - /** - * @brief Populates \p masks with static color mask generated for a squared matrix of size \p matrix_size . - * - * Colors are built in the range [0, \p colors ), with the mask for color 0 being the array - * of values true in the positions \f$ [0, colors, 2*colors, ..., floor((system_size - 1)/colors) * color] \f$, - * for color 1 in the positions \f$ [1, 1+colors, 1+2*colors, ..., floor((system_size - 2)/colors) * color] \f$, - * etc.; the mask for color 0 is in \c masks[0], for color 1 in \c masks[1] and so on. - * - * The vectors stored in \p masks (assumed empty at the beginning) are built inside the function and populated - * only with the \c true values, leading to sparse vectors. This saves on storage space and allows - * GraphBLAS routines (like \c eWiseLambda() ) to iterate only on true values. - * - * @tparam B GraphBLAS backend for the vector - * @param masks output vector of color masks - * @param matrix_size size of the system matrix - * @param colors numbers of colors masks to build; it must be < \p matrix_size - * @return grb::RC the success value returned when trying to build the vector - */ - template< enum grb::Backend B > - grb::RC build_static_color_masks( std::vector< grb::Vector< bool, B > > & masks, std::size_t matrix_size, std::size_t colors ) { - if( ! masks.empty() ) { - throw std::invalid_argument( "vector of masks is expected to be " - "empty" ); - } - if( matrix_size < colors ) { - throw std::invalid_argument( "syztem size is < number of colors: too " - "small" ); - } - grb::RC rc { grb::SUCCESS }; - masks.reserve( colors ); - for( std::size_t i { 0U }; i < colors; i++ ) { - // build in-place, assuming the compiler deduces the right constructor according to B - masks.emplace_back( matrix_size ); - grb::Vector< bool > & mask = masks.back(); - // grb::set(mask, false); // DO NOT initialize false's explicitly, otherwise - // RBGS will touch them too and the runtime will increase! - for( std::size_t j = i; j < matrix_size; j += colors ) { - rc = grb::setElement( mask, true, j ); - assert( rc == grb::SUCCESS ); - if( rc != grb::SUCCESS ) - return rc; - } - } - return rc; - } - - } //namespace old - } // namespace algorithms -} // namespace grb - -#endif // _H_GRB_ALGORITHMS_MATRIX_BUILDING_UTILS diff --git a/include/graphblas/algorithms/hpcg/old_ndim_matrix_builders.hpp b/include/graphblas/algorithms/hpcg/old_ndim_matrix_builders.hpp deleted file mode 100644 index 9f64e9884..000000000 --- a/include/graphblas/algorithms/hpcg/old_ndim_matrix_builders.hpp +++ /dev/null @@ -1,562 +0,0 @@ - -/* - * Copyright 2021 Huawei Technologies Co., Ltd. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * @file ndim_matrix_builders.hpp - * @author Alberto Scolari (alberto.scolari@huawei.com) - * @brief Utilities to build matrices for an HPCG simulation in a generic number of dimensions - * - * In particular, the main matrices are: - * - a system matrix, generated from an N-dimenional space of coordinates by iterating along - * each dimension in priority order, where the first dimension has highest priority and the last - * dimension least priority; for each point (row), all its N-dimensional neighbours within - * a given distance are generated for the column - * - a coarsening matrix, generated by iterating on a coarser system of N dimensions (row) and projecting - * each point to a corresponding system of finer sizes - * - * @date 2021-04-30 - */ - -#ifndef _H_GRB_ALGORITHMS_OLD_NDIM_MATRIX_BUILDERS -#define _H_GRB_ALGORITHMS_OLD_NDIM_MATRIX_BUILDERS - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace grb { - namespace algorithms { - namespace old { - - /** - * @brief Base class that iterates on DIMS dimensions starting from the first one. - * - * The coordinates are assumed to generate the row number in a matrix whose number of rows is - * the product of all sizes. This class generates row numbers for physical problems described as - * systems of linear equations in an n-dimensional space. - * - * Example of iterations in a 3D (x, y, z) system of size (4,3,2), with generated row numbers - * reported as '=> ROW': - * - z[0] - * - y[0] - * - x[0] => 0, x[1] => 1, x[2] => 2, x[3] => 3 - * - y[1] - * - x[0] => 4, x[1] => 5, x[2] => 6, x[3] => 7 - * - y[2] - * - x[0] => 8, x[1] => 9, x[2] => 10, x[3] => 11 - * - z[1] - * - y[0] - * - x[0] => 12, x[1] => 13, x[2] => 14, x[3] => 15 - * - y[1] - * - x[0] => 16, x[1] => 17, x[2] => 18, x[3] => 19 - * - y[2] - * - x[0] => 20, x[1] => 21, x[2] => 22, x[3] => 23 - * - * The main goal of this class is to be derived by other classes to generate matrices in an - * STL-iterator-fashion; hence, this class contains all the code for basic coordinate-to-row-column - * conversion in \p DIM dimensions and the basic logic to increment the row number. - * - * @tparam DIMS number os dimensions of the system - */ - template< std::size_t DIMS > - struct row_generator { - - using row_coordinate_type = std::size_t; ///< numeric type of rows - using array_t = std::array< row_coordinate_type, - DIMS >; ///< type for the array storing the coordinates. - - const array_t physical_sizes; ///< size of each dimension, starting from the one to be explored first - - /** - * @brief Construct a new row generator object - * @param[in] _sizes array of sizes of each dimension; no dimension should be 0, otherwise an exception - * is thrown - * @param[in] first_row first row to iterate from; it is allowed to be beyond the matrix size, e.g. to create - * an end iterator (no check occurs) - */ - row_generator( const array_t & _sizes, row_coordinate_type first_row ) : physical_sizes( _sizes ) { - static_assert( DIMS > 0, "DIMS should be higher than 0" ); - for( const auto i : _sizes ) { - if( i == static_cast< row_coordinate_type >( 0U ) ) { - throw std::invalid_argument( "All dimension sizes must " - "be > 0" ); - } - } - row_to_coords( first_row ); - } - - row_generator( const row_generator & o ) = default; - - row_generator( row_generator && o ) = default; - - protected: - // x: row_coords[0], y: row_coords[1], z: row_coords[2], ... - array_t row_coords; ///< n-D coordinates from which to compute the row - - /** - * @brief converts a row number into a n-D coordinates according to the sizes in #physical_sizes - * - * In case the input is higher than the nunber of rows, the last coordinate is allowed to - * go beyond its physical size. E.g., if the system has size (4,3,2) and \p rowcol is 24, - * the coordinates are (0,0,3). - * - * @param[in] rowcol row number to convert; it can be any number - */ - void row_to_coords( row_coordinate_type rowcol ) { - std::size_t s = 1; - for( std::size_t i { 0 }; i < row_coords.size() - 1; i++ ) - s *= physical_sizes[ i ]; - - for( typename array_t::size_type i { row_coords.size() - 1 }; i > 0; i-- ) { - row_coords[ i ] = rowcol / s; - rowcol -= row_coords[ i ] * s; - s /= physical_sizes[ i ]; - } - row_coords[ 0 ] = rowcol % physical_sizes[ 0 ]; - } - - /** - * @brief Pure function converting an array of coordinates into a row number, based on #physical_sizes. - * @param a the #array_t array of coordinates to convert - * @return #row_coordinate_type the row corresponding to the coordinates in \p a - */ - row_coordinate_type coords_to_rowcol( const array_t & a ) const { - row_coordinate_type row { 0 }; - row_coordinate_type s { 1 }; - for( typename array_t::size_type i { 0 }; i < a.size(); i++ ) { - row += s * a[ i ]; - s *= physical_sizes[ i ]; - } - return row; - } - - /** - * @brief Increment #row_coords in order to move to the next coordinate (according to the - * n-dimensional iteration order) and update #current_row accordingly. - * - * To be used by derived classes in order to generate the matrix, e.g. via the \c operator()++ - * operator prescribed for STL-like iterators. - */ - void increment_row() { - bool rewind; - typename array_t::size_type i { 0 }; - do { - typename array_t::value_type & coord = row_coords[ i ]; - // must rewind dimension if we wrap-around - typename array_t::value_type new_coord = ( coord + 1 ) % physical_sizes[ i ]; - rewind = new_coord < coord; - coord = new_coord; - ++i; - } while( rewind && i < row_coords.size() - 1 ); // rewind only the first N-1 coordinates - - // if we still have to rewind, increment the last coordinate, which is unbounded - if( rewind ) { - row_coords.back()++; - } - } - }; - - // =============================================================== - - /** - * @brief STL-like iterable class to generate the values for a matrix by iterating in an n-dimensional - * space along the coordinates. - * - * For each \f$ X=(x0, x1, ...,xn) \f$ point of the underlying (n+1)-dimensional space, - * this class iterates through the points of the n-dimensional halo of radius \p halo around \f$ X \f$, - * generating the row number corresponding to \f$ X \f$ and the column number corresponding to - * each halo point. At each coordinate \code (row, col) \endcode generated this way, the corresponding matrix value - * being generated depends on whether \code row == col \endcode. - * - * @tparam DIMS number of dimensions of the system - * @tparam HALO halo size, determining the number of points to iterate around and thus the column coordinates - * @tparam T type of matrix values - */ - template< std::size_t DIMS, typename T = double > - struct matrix_generator_iterator : public row_generator< DIMS > { - - using row_coordinate_type = typename row_generator< DIMS >::row_coordinate_type; - using column_coordinate_type = typename row_generator< DIMS >::row_coordinate_type; - using nonzero_value_type = T; - using array_t = typename row_generator< DIMS >::array_t; - using value_type = std::pair< std::pair< row_coordinate_type, column_coordinate_type >, T >; - - using RowIndexType = typename row_generator< DIMS >::row_coordinate_type; - using ColumnIndexType = typename row_generator< DIMS >::row_coordinate_type; - using iterator_category = std::forward_iterator_tag; - using pointer = const value_type; - using reference = const value_type&; - using difference_type = long; - - // halo may in future become a DIM-size array to iterate in arbitrary shapes - const row_coordinate_type halo; ///< number of points per dimension to iterate around - const nonzero_value_type diagonal_value; ///< value to be emitted when the object has moved to the diagonal - const nonzero_value_type non_diagonal_value; ///< value to emit outside of the diagonal - - /** - * @brief Construct a new \c matrix_generator_iterator object, setting the current row as \p row - * and emitting \p diag if the iterator has moved on the diagonal, \p non_diag otherwise. - * - * @param sizes array with the sizes along the dimensions - * @param row current row to initialize the matrix on - * @param _halo halo of points to iterate around; must be > 0 - * @param diag value to emit when on the diagonal - * @param non_diag value to emit outside the diagonal - */ - matrix_generator_iterator( const array_t & sizes, row_coordinate_type row, row_coordinate_type _halo, nonzero_value_type diag, nonzero_value_type non_diag ) : - row_generator< DIMS >( sizes, row ), halo( _halo ), diagonal_value( diag ), non_diagonal_value( non_diag ) { - if( halo <= 0 ) { - throw std::invalid_argument( "halo should be higher than " - "0" ); - } - for( const auto i : sizes ) { - if( i < static_cast< row_coordinate_type >( 2 * halo + 1 ) ) { - throw std::invalid_argument( "Iteration halo goes " - "beyond system sizes" ); - } - } - current_values.first.first = row; - update_column_max_values(); - reset_all_columns(); - current_values.first.second = this->coords_to_rowcol( col_coords ); - current_values.second = v(); - } - - matrix_generator_iterator( const matrix_generator_iterator & o ) = default; - - matrix_generator_iterator( matrix_generator_iterator && o ) = default; - - /** - * @brief Increments the iterator by moving coordinates to the next (row, column) to iterate on. - * - * This operator internally increments the columns coordinates until wrap-around, when it increments - * the row coordinates and resets the column coordinates to the first possible columns; this column coordinate - * depends on the row coordinates according to the dimensions iteration order and on the parameter \p halo. - * - * @return matrix_generator_iterator& \c this object, with the updated state - */ - matrix_generator_iterator< DIMS, T > & operator++() { - bool must_rewind = increment_column(); - if( must_rewind ) { - this->increment_row(); - // after changing row, we must find the first non-zero column - reset_all_columns(); - current_values.first.first = this->coords_to_rowcol( this->row_coords ); - update_column_max_values(); - } - // trigger column update after row update, as a row update - // triggers a column update - current_values.first.second = this->coords_to_rowcol( col_coords ); - current_values.second = this->v(); - return *this; - } - - /** - * @brief Operator to compare \c this against \p o and return whether they differ. - * - * @param o object to compare \c this against - * @return true of the row or the column is different between \p o and \c this - * @return false if both row and column of \p o and \c this are equal - */ - bool operator!=( const matrix_generator_iterator< DIMS, T > & o ) const { - if( o.i() != this->i() ) { - return true; - } - return o.j() != this->j(); - } - - /** - * @brief Operator to compare \c this against \p o and return whether they are equal. - * - * @param o object to compare \c this against - * @return true of the row or the column is different between \p o and \c this - * @return false if both row and column of \p o and \c this are equal - */ - bool operator==( const matrix_generator_iterator< DIMS, T > & o ) const { - return o.i() == this->i() && o.j() == this->j(); - } - - /** - * @brief Operator returning the triple to directly access row, column and element values. - * - * Useful when building the matrix by copying the triple of coordinates and value, - * like for the BSP1D backend. - */ - const value_type & operator*() const { - return current_values; - } - - /** - * @brief Returns current row. - */ - inline row_coordinate_type i() const { - return current_values.first.first; - } - - /** - * @brief Returns current column. - */ - inline column_coordinate_type j() const { - return current_values.first.second; - } - - /** - * @brief Returns the current matrix value. - * - * @return nonzero_value_type #diagonal_value if \code row == column \endcode (i.e. if \code this-> \endcode - * #i() \code == \endcode \code this-> \endcode #j()), #non_diagonal_value otherwise - */ - inline nonzero_value_type v() const { - return j() == i() ? diagonal_value : non_diagonal_value; - } - - private: - // offsets w.r.t. rows - array_t col_coords; ///< coordinates corresponding to current column - array_t column_max_values; ///< maximum values for the column coordinates, to stop column increment - //// and reset the column coordinates - value_type current_values; ///< triple storing the current value for row, column and matrix element - - /** - * @brief Updates the maximum values each column coordinate can reach, according to the row coordinates. - * - * To be called after each row coordinates update. - */ - void update_column_max_values() { - for( std::size_t i { 0 }; i < column_max_values.size(); i++ ) { - column_max_values[ i ] = std::min( this->physical_sizes[ i ] - 1, this->row_coords[ i ] + halo ); - } - } - - /** - * @brief Resets the value of column dimension \p dim to the first possible value. - * - * The final value of #col_coords[dim] depends on the current row (#row_coords) and on the \p halo - * and is \f$ max(0, \f$ #row_coords \f$[dim])\f$. - * - * @param dim the dimension to reset - */ - void reset_column_coords( std::size_t dim ) { - // cannot use std::max because row_coords is unsigned and can wrap-around - col_coords[ dim ] = this->row_coords[ dim ] <= halo ? 0 : ( this->row_coords[ dim ] - halo ); - } - - /** - * @brief resets all values in #col_coords to the initial coordinates, - * iterating from on the current row. - */ - void reset_all_columns() { - for( std::size_t i { 0 }; i < col_coords.size(); i++ ) { - reset_column_coords( i ); - } - } - - /** - * @brief Increment the column according to the iteration order, thus resetting the column coordinates - * when the last possible column value for the current row has been reached. - * - * @return true if the column coordinates have been reset, and thus also the row must be incremented - * @return false if the column coordinates - */ - bool increment_column() { - bool rewind; - typename array_t::size_type i { 0 }; - do { - typename array_t::value_type & col = col_coords[ i ]; - // must rewind dimension if the column offset is already at the max value - // or if the column coordinates are already at the max value - rewind = ( col == column_max_values[ i ] ); - if( rewind ) { - // col = this->row_coords[i] == 0 ? 0 : this->row_coords[i] - (halo); - reset_column_coords( i ); - } else { - ++col; - } - ++i; - } while( rewind && i < col_coords.size() ); - - // if we change z, then we also must reset x and y; if only y, we must reset x, and so on - return rewind; - } - }; - - // =============================================================== - - /** - * @brief Class to generate the coarsening matrix of an underlying \p DIMS -dimensional system. - * - * This class coarsens a finer system to a coarser system by projecting each input value (column), - * espressed in finer coordinates, to an output (row) value espressed in coarser coordinates. - * The coarser sizes are assumed to be row_generator#physical_sizes, while the finer sizes are here - * stored inside #finer_sizes. - * - * The corresponding refinement matrix is obtained by transposing the coarsening matrix. - * - * @tparam DIMS number of dimensions of the system - * @tparam T type of matrix values - */ - template< std::size_t DIMS, typename T = double > - struct coarsener_generator_iterator : public row_generator< DIMS > { - - using row_coordinate_type = typename row_generator< DIMS >::row_coordinate_type; - using column_coordinate_type = typename row_generator< DIMS >::row_coordinate_type; - using nonzero_value_type = T; - using array_t = typename row_generator< DIMS >::array_t; - using value_type = std::pair< std::pair< row_coordinate_type, column_coordinate_type >, T >; - - using RowIndexType = typename row_generator< DIMS >::row_coordinate_type; - using ColumnIndexType = typename row_generator< DIMS >::row_coordinate_type; - using iterator_category = std::forward_iterator_tag; - using pointer = const value_type; - using reference = const value_type&; - using difference_type = long; - - // the sizes to project from - const array_t finer_sizes; ///< the size of the finer system (columns) - array_t steps; ///< array of steps, i.e. how much each column coordinate (finer system) must be - //// incremented when incrementing the row coordinates; is is the ration between - //// #finer_sizes and row_generator#physical_sizes - - /** - * @brief Construct a new \c coarsener_generator_iterator object from the coarser and finer sizes, - * setting its row at \p _current_row and the column at the corresponding value. - * - * Each finer size must be an exact multiple of the corresponding coarser size, otherwise the - * construction will throw an exception. - * - * @param _coarser_sizes sizes of the coarser system (rows) - * @param _finer_sizes sizes of the finer system (columns) - * @param _current_row row (in the coarser system) to set the iterator on - */ - coarsener_generator_iterator( const array_t & _coarser_sizes, const array_t & _finer_sizes, row_coordinate_type _current_row ) : - row_generator< DIMS >( _coarser_sizes, _current_row ), finer_sizes( _finer_sizes ), steps( { 0 } ) { - for( std::size_t i { 0 }; i < DIMS; i++ ) { - // finer size MUST be an exact multiple of coarser_size - typename array_t::value_type step { _finer_sizes[ i ] / _coarser_sizes[ i ] }; - if( step == 0 || finer_sizes[ i ] / step != this->physical_sizes[ i ] ) { - throw std::invalid_argument( std::string( "finer size " - "of " - "dimension" - " " ) + - std::to_string( i ) + - std::string( "is not an exact multiple of coarser " - "size" ) ); - } - steps[ i ] = step; - } - current_values.first.first = _current_row; - current_values.first.second = coords_to_finer_col(); - current_values.second = v(); - } - - coarsener_generator_iterator( const coarsener_generator_iterator & o ) = default; - - coarsener_generator_iterator( coarsener_generator_iterator && o ) = default; - - /** - * @brief Increments the row and the column according to the respective physical sizes, - * thus iterating onto the coarsening matrix coordinates. - * - * @return \code *this \endcode, i.e. the same object with the updates row and column - */ - coarsener_generator_iterator< DIMS, T > & operator++() { - this->increment_row(); - current_values.first.first = this->coords_to_rowcol( this->row_coords ); - current_values.first.second = coords_to_finer_col(); - current_values.second = v(); - return *this; - } - - /** - * @brief Returns whether \c this and \p o differ. - */ - bool operator!=( const coarsener_generator_iterator< DIMS, T > & o ) const { - if( this->i() != o.i() ) { - return true; - } - return this->j() != o.j(); - } - - /** - * @brief Returns whether \c this and \p o are equal. - */ - bool operator==( const coarsener_generator_iterator< DIMS, T > & o ) const { - return this->i() == o.i() && this->j() == o.j(); - } - - /** - * @brief Operator returning the triple to directly access row, column and element values. - * - * Useful when building the matrix by copying the triple of coordinates and value, - * like for the BSP1D backend. - */ - const value_type & operator*() const { - return current_values; - } - - /** - * @brief Returns the current row, according to the coarser system. - */ - inline row_coordinate_type i() const { - return current_values.first.first; - } - - /** - * @brief Returns the current column, according to the finer system. - */ - inline column_coordinate_type j() const { - return current_values.first.second; - } - - /** - * @brief Returns always 1, as the coarsening keeps the same value. - */ - inline nonzero_value_type v() const { - return static_cast< nonzero_value_type >( 1 ); - } - - private: - value_type current_values; ///< triple storing the current value for row, column and matrix element - - /** - * @brief Returns the row coordinates converted to the finer system, to compute - * the column value. - */ - column_coordinate_type coords_to_finer_col() const { - column_coordinate_type row { 0 }; - column_coordinate_type s { 1 }; - for( typename array_t::size_type i { 0 }; i < this->row_coords.size(); i++ ) { - s *= steps[ i ]; - row += s * this->row_coords[ i ]; - s *= this->physical_sizes[ i ]; - } - return row; - } - }; - - } // namespace old - } // namespace algorithms -} // namespace grb - -#endif // _H_GRB_ALGORITHMS_NDIM_MATRIX_BUILDERS diff --git a/include/graphblas/algorithms/hpcg/system_building_utils.hpp b/include/graphblas/algorithms/hpcg/system_building_utils.hpp index 77bef1995..7a8db963d 100644 --- a/include/graphblas/algorithms/hpcg/system_building_utils.hpp +++ b/include/graphblas/algorithms/hpcg/system_building_utils.hpp @@ -22,62 +22,28 @@ * @date 2021-04-30 */ -#ifndef _H_GRB_ALGORITHMS_SYSTEM_BUILDING_UTILS -#define _H_GRB_ALGORITHMS_SYSTEM_BUILDING_UTILS +#ifndef _H_GRB_ALGORITHMS_HPCG_SYSTEM_BUILDING_UTILS +#define _H_GRB_ALGORITHMS_HPCG_SYSTEM_BUILDING_UTILS #include #include #include #include #include +#include +#include +#include #include -#include +#include -#include "hpcg_data.hpp" -#include "matrix_building_utils.hpp" +#include "ndim_matrix_builders.hpp" #include "coloring.hpp" -#ifndef MASTER_PRINT -#define INTERNAL_MASTER_PRINT -#define MASTER_PRINT( pid, txt ) if( pid == 0 ) { std::cout << txt; } -#endif - - namespace grb { namespace algorithms { - /** - * @brief Divide each value of \p source by \p step and store the result into \p destination. - * - * @tparam DIMS size of passed arrays - */ - template< std::size_t DIMS > - void divide_array( std::array< std::size_t, DIMS > & destination, const std::array< std::size_t, DIMS > & source, std::size_t step ) { - for( std::size_t i { 0 }; i < destination.size(); i++ ) { - destination[ i ] = source[ i ] / step; - } - } - - /** - * @brief Container of the parameter for HPCG simulation generation: physical system characteristics and - * coarsening information. - * - * @tparam DIMS dimensions of the physical system - * @tparam T type of matrix values - */ - template< std::size_t DIMS, typename T > - struct hpcg_system_params { - std::array< std::size_t, DIMS > physical_sys_sizes; - std::size_t halo_size; - T diag_value; - T non_diag_value; - std::size_t min_phys_size; - std::size_t max_levels; - std::size_t coarsening_step; - }; - template< typename CoordType > void split_rows_by_color( const std::vector< CoordType > & row_colors, size_t num_colors, @@ -89,190 +55,293 @@ namespace grb { } } - // SystemData must have a zero_temp_vectors() - template< std::size_t DIMS, typename IOType, typename NonzeroType, typename SystemData > - grb::RC build_base_system( - typename std::enable_if< - std::is_base_of< system_data< IOType, NonzeroType >, SystemData >::value, - SystemData& >::type system, - size_t system_size, - const std::array< std::size_t, DIMS > & physical_sys_sizes, - size_t halo_size, - NonzeroType diag_value, - NonzeroType non_diag_value, - std::array< double, 4 > & times + template < + size_t DIMS, + typename coord_t, + typename NonzeroType, + enum grb::Backend B + > grb::RC populate_system_matrix( + const grb::algorithms::HPCGBuilder< DIMS, coord_t, NonzeroType > &system_generator, + grb::Matrix< NonzeroType, B > &M ) { - - grb::RC rc { grb::SUCCESS }; const size_t pid { spmd<>::pid() }; - grb::utils::Timer timer; - static const char * const log_prefix = " -- "; - using coord_t = size_t; - static_assert( DIMS > 0, "DIMS must be > 0" ); - size_t n { std::accumulate( physical_sys_sizes.cbegin(), physical_sys_sizes.cend(), - 1UL, std::multiplies< size_t >() ) }; - if( n > std::numeric_limits< coord_t >::max() ) { - throw std::domain_error( "CoordT cannot store the matrix coordinates" ); + if( pid == 0) { + std::cout << "- generating system matrix..."; } - std::array< coord_t, DIMS > sys_sizes; - for( size_t i = 0; i < DIMS; i++ ) sys_sizes[i] = physical_sys_sizes[i]; - grb::algorithms::hpcg_builder< DIMS, coord_t, NonzeroType > system_generator( sys_sizes, halo_size ); - - MASTER_PRINT( pid, log_prefix << "generating system matrix..." ); - timer.reset(); - rc = build_ndims_system_matrix< DIMS, coord_t, NonzeroType >( - system.A, - system_generator, - diag_value, non_diag_value + typename grb::algorithms::HPCGBuilder< DIMS, coord_t, NonzeroType >::Iterator begin( + system_generator.make_begin_iterator() ); + typename grb::algorithms::HPCGBuilder< DIMS, coord_t, NonzeroType >::Iterator end( + system_generator.make_end_iterator() ); - if( rc != grb::SUCCESS ) { - return rc; + grb::utils::partition_iteration_range_on_procs( system_generator.num_neighbors(), begin, end ); + return buildMatrixUnique( M, begin, end, grb::IOMode::PARALLEL ); + } + + + template< + typename coord_t, + size_t DIMS, + typename IOType, + typename NonzeroType + > grb::RC populate_coarsener( + const grb::algorithms::HPCGBuilder< DIMS, coord_t, NonzeroType > &finer_system_generator, + const grb::algorithms::HPCGBuilder< DIMS, coord_t, NonzeroType > &coarser_system_generator, + coarsening_data< IOType, NonzeroType > &coarsener + ) { + static_assert( DIMS > 0, "DIMS must be > 0" ); + + const std::array< coord_t, DIMS > &finer_sizes = finer_system_generator.get_generator().get_sizes(); + const std::array< coord_t, DIMS > &coarser_sizes = coarser_system_generator.get_generator().get_sizes(); + const size_t finer_size = finer_system_generator.system_size(); + const size_t coarser_size = coarser_system_generator.system_size(); + + if( coarser_size >= finer_size ) { + throw std::invalid_argument( "wrong sizes"); } - times[ 0 ] = timer.time(); - MASTER_PRINT( pid, " time (ms) " << times[ 0 ] << std::endl ); - // set values of vectors - MASTER_PRINT( pid, log_prefix << "populating vectors..." ); - timer.reset(); - rc = set( system.A_diagonal, diag_value ); - if( rc != grb::SUCCESS ) { - return rc; + size_t const rows { coarser_size }; + size_t const cols { finer_size }; + + assert( finer_sizes.size() == coarser_sizes.size() ); + + for( size_t i { 0 }; i < coarser_sizes.size(); i++ ) { + std::ldiv_t ratio = std::ldiv( finer_sizes[ i ], coarser_sizes[ i ] ); + if( ratio.quot < 2 || ratio.rem != 0 ) { + throw std::invalid_argument( "finer sizes should be a multiple of coarser sizes" ); + } + } + grb::Matrix< NonzeroType > &M = coarsener.coarsening_matrix; + if( grb::nrows( M ) != rows || grb::ncols( M ) != cols ) { + throw std::invalid_argument( "wrong matrix dimensions: matrix should be rectangular" + " with rows == and cols == " ); + } + + grb::algorithms::hpcg_coarsener_builder< DIMS, coord_t, NonzeroType > coarsener_builder( coarser_sizes, finer_sizes ); + grb::algorithms::coarsener_generator_iterator< DIMS, coord_t, NonzeroType > begin( coarsener_builder.make_begin_iterator() ); + grb::algorithms::coarsener_generator_iterator< DIMS, coord_t, NonzeroType > end( coarsener_builder.make_end_iterator() ); + grb::utils::partition_iteration_range_on_procs( coarsener_builder.system_size(), begin, end ); + return buildMatrixUnique( M, begin, end, grb::IOMode::PARALLEL ); + } + + namespace internal { + + template< typename CoordType > struct true_iter { + + static const bool __TRUE = true; + + using self_t = true_iter< CoordType >; + using iterator_category = std::random_access_iterator_tag; + using value_type = bool; + using pointer = const bool *; + using reference = const bool&; + using difference_type = long; + + true_iter() = delete; + + true_iter( CoordType first ): index( first ) {} + + true_iter( const self_t & ) = default; + + self_t & operator=( const self_t & ) = default; + + bool operator!=( const self_t & other ) const { + return this->index != other.index; + } + + self_t & operator++() noexcept { + (void) index++; + return *this; + } + + self_t & operator+=( size_t increment ) noexcept { + index += increment; + return *this; + } + + difference_type operator-( const self_t & other ) noexcept { + return static_cast< difference_type >( this->index - other.index ); + } + + pointer operator->() const { + return &__TRUE; + } + + reference operator*() const { + return *(this->operator->()); + } + + private: + CoordType index; + }; + + template< typename CoordType > const bool true_iter< CoordType >::__TRUE; + + /** + * @brief Populates \p masks with static color mask generated for a squared matrix of size \p matrix_size . + * + * Colors are built in the range [0, \p colors ), with the mask for color 0 being the array + * of values true in the positions \f$ [0, colors, 2*colors, ..., floor((system_size - 1)/colors) * color] \f$, + * for color 1 in the positions \f$ [1, 1+colors, 1+2*colors, ..., floor((system_size - 2)/colors) * color] \f$, + * etc.; the mask for color 0 is in \c masks[0], for color 1 in \c masks[1] and so on. + * + * The vectors stored in \p masks (assumed empty at the beginning) are built inside the function and populated + * only with the \c true values, leading to sparse vectors. This saves on storage space and allows + * GraphBLAS routines (like \c eWiseLambda() ) to iterate only on true values. + * + * @tparam B GraphBLAS backend for the vector + * @param masks output vector of color masks + * @param matrix_size size of the system matrix + * @param colors numbers of colors masks to build; it must be < \p matrix_size + * @return grb::RC the success value returned when trying to build the vector + */ + template< enum grb::Backend B > + grb::RC build_static_color_masks( + size_t matrix_size, + const std::vector< std::vector< size_t > > &per_color_rows, + std::vector< grb::Vector< bool, B > > & masks + ) { + if( ! masks.empty() ) { + throw std::invalid_argument( "vector of masks is expected to be empty" ); + } + for( size_t i = 0; i < per_color_rows.size(); i++ ) { + const std::vector< size_t > & rows = per_color_rows[ i ]; + /* + { + std::cout << "\ncolor " << i << std::endl; + for( size_t row : rows ) { + std::cout << row << " "; + } + std::cout << std::endl; + } + */ + masks.emplace_back( matrix_size ); + grb::Vector< bool > & output_mask = masks.back(); + std::vector< size_t >::const_iterator begin = rows.cbegin(); + std::vector< size_t >::const_iterator end = rows.cend(); + // partition_iteration_range( rows.size(), begin, end ); + grb::RC rc = grb::buildVectorUnique( output_mask, begin , end, true_iter< size_t >( 0 ), + true_iter< size_t >( std::distance( begin, end ) ), IOMode::SEQUENTIAL ); + if( rc != SUCCESS ) { + std::cerr << "error while creating output mask for color " << i << ": " + << toString( rc ) << std::endl; + return rc; + } + /* + { + std::cout << "mask color " << i << std::endl; + size_t count = 0; + for( const auto & v : output_mask ) { + std::cout << v.first << " "; + count++; + if( count > 20 ) break; + } + std::cout << std::endl; + } + */ + } + return grb::SUCCESS; } - rc = system.zero_temp_vectors(); + + } // namespace internal + + template< + typename coord_t, + size_t DIMS, + typename T + > grb::RC populate_smoothing_data( + const grb::algorithms::HPCGBuilder< DIMS, coord_t, T > &system_generator, + smoother_data< T > &smoothing_info + ) { + const size_t pid { spmd<>::pid() }; + + grb::RC rc = set( smoothing_info.A_diagonal, system_generator.get_diag_value() ); if( rc != grb::SUCCESS ) { + if( pid == 0 ) { + std::cout << "error: " << __LINE__ << std::endl; + } return rc; } - times[ 1 ] = timer.time(); - MASTER_PRINT( pid, " time (ms) " << times[ 1 ] << std::endl ); - MASTER_PRINT( pid, log_prefix << "running coloring heuristics..." ); - timer.reset(); + if( pid == 0 ) { + std::cout << "- running coloring heuristics..."; + } std::vector< coord_t > colors, color_counters; color_matrix_greedy( system_generator.get_generator(), colors, color_counters ); std::vector< std::vector< coord_t > > per_color_rows; split_rows_by_color( colors, color_counters.size(), per_color_rows ); if( rc != grb::SUCCESS ) { + if( pid == 0 ) { + std::cout << "error: " << __LINE__ << std::endl; + } return rc; } - times[ 2 ] = timer.time(); - MASTER_PRINT( pid, " found " << color_counters.size() << " colors, time (ms) " - << times[ 2 ] << std::endl ); - - - MASTER_PRINT( pid, log_prefix << "generating color masks..." ); - timer.reset(); - rc = build_static_color_masks( system_size, per_color_rows, system.color_masks ); - if( rc != grb::SUCCESS ) { - return rc; + if( pid == 0 ) { + std::cout <<"- found " << color_counters.size() << " colors," + << " generating color masks..."; } - times[ 3 ] = timer.time(); - MASTER_PRINT( pid, " time (ms) " << times[ 3 ] << std::endl ); - - return rc; + return internal::build_static_color_masks( system_generator.system_size(), + per_color_rows, smoothing_info.color_masks ); } /** - * @brief Generates an entire HPCG problem according to the parameters in \p params , storing it in \p holder . + * @brief Container of the parameter for HPCG simulation generation: physical system characteristics and + * coarsening information. * - * @tparam DIMS dimensions of the system + * @tparam DIMS dimensions of the physical system * @tparam T type of matrix values - * @param holder std::unique_ptr to store the HPCG problem into - * @param params parameters container to build the HPCG problem - * @return grb::SUCCESS if every GraphBLAS operation (to generate vectors and matrices) succeeded, - * otherwise the first unsuccessful return value */ - template< std::size_t DIMS, typename T = double > - grb::RC build_hpcg_system( - std::unique_ptr< grb::algorithms::hpcg_data< T, T, T > > & holder, - const hpcg_system_params< DIMS, T > & params - ) { - // n is the system matrix size - const std::size_t n { std::accumulate( params.physical_sys_sizes.cbegin(), - params.physical_sys_sizes.cend(), 1UL, std::multiplies< std::size_t >() ) }; - - grb::algorithms::hpcg_data< T, T, T > * data { new grb::algorithms::hpcg_data< T, T, T >( n ) }; - - assert( ! holder ); // should be empty - holder = std::unique_ptr< grb::algorithms::hpcg_data< T, T, T > >( data ); + template< size_t DIMS, typename T > + struct hpcg_system_params { + std::array< size_t, DIMS > physical_sys_sizes; + size_t halo_size; + T diag_value; + T non_diag_value; + size_t min_phys_size; + size_t max_levels; + size_t coarsening_step; + }; - // initialize the main (=uncoarsened) system matrix - grb::RC rc { grb::SUCCESS }; - const size_t pid { spmd<>::pid() }; - grb::utils::Timer timer; + template< + size_t DIMS, + typename coord_t, + typename T + > void build_hpcg_multigrid_generators( + const hpcg_system_params< DIMS, T > ¶ms, + std::vector< grb::algorithms::HPCGBuilder< DIMS, coord_t, T > > &mg_generators + ) { + static_assert( DIMS > 0, "DIMS must be > 0" ); - std::array< double, 4 > times; - MASTER_PRINT( pid, "\n-- main system" << std::endl ); - rc = build_base_system< DIMS, T, T, grb::algorithms::hpcg_data< T, T, T > >( *data, n, params.physical_sys_sizes, params.halo_size, - params.diag_value, params.non_diag_value, times ); - if( rc != grb::SUCCESS ) { - MASTER_PRINT( pid, " error: " << toString( rc ) ); - return rc; + size_t const current_size{ std::accumulate( params.physical_sys_sizes.cbegin(), params.physical_sys_sizes.cend(), 1UL, + std::multiplies< size_t >() ) }; + if( current_size > std::numeric_limits< coord_t >::max() ) { + throw std::domain_error( "CoordT cannot store the matrix coordinates" ); + } + size_t min_physical_size { *std::min_element( params.physical_sys_sizes.cbegin(), params.physical_sys_sizes.cend() ) }; + if( min_physical_size < params.min_phys_size ) { + throw std::domain_error( "the initial system is too small" ); } - MASTER_PRINT( pid, "-- main system generation time (ms) " - "(system matrix,vectors,coloring,color masks):" << times[ 0 ] << "," << times[ 1 ] - << "," << times[ 2 ] << "," << times[ 3 ] << std::endl; - ); - // initialize coarsening with additional pointers and dimensions copies to iterate and divide - grb::algorithms::multi_grid_data< T, T > ** coarser = &data->coarser_level; - assert( *coarser == nullptr ); - std::array< std::size_t, DIMS > coarser_sizes; - std::array< std::size_t, DIMS > previous_sizes( params.physical_sys_sizes ); - std::size_t min_physical_coarsened_size { *std::min_element( previous_sizes.cbegin(), previous_sizes.cend() ) / params.coarsening_step }; - // coarsen system sizes into coarser_sizes - divide_array( coarser_sizes, previous_sizes, params.coarsening_step ); - std::size_t coarsening_level = 0UL; + std::array< coord_t, DIMS > coord_sizes; + // type-translate coordinates + std::copy( params.physical_sys_sizes.cbegin(), params.physical_sys_sizes.cend(), coord_sizes.begin() ); // generate linked list of hierarchical coarseners - while( min_physical_coarsened_size >= params.min_phys_size && coarsening_level < params.max_levels ) { - assert( *coarser == nullptr ); - // compute size of finer and coarser matrices - std::size_t coarser_size { std::accumulate( coarser_sizes.cbegin(), coarser_sizes.cend(), 1UL, std::multiplies< std::size_t >() ) }; - std::size_t previous_size { std::accumulate( previous_sizes.cbegin(), previous_sizes.cend(), 1UL, std::multiplies< std::size_t >() ) }; - // build data structures for new level - grb::algorithms::multi_grid_data< T, T > * new_coarser { new grb::algorithms::multi_grid_data< double, double >( coarser_size, previous_size ) }; - // install coarser level immediately to cleanup in case of build error - *coarser = new_coarser; - - MASTER_PRINT( pid, "-- level " << coarsening_level << "\n -- generating coarsening matrix...\n" ); - timer.reset(); - // initialize coarsener matrix, system matrix and diagonal vector for the coarser level - rc = build_ndims_coarsener_matrix< DIMS >( new_coarser->coarsening_matrix, coarser_sizes, previous_sizes ); - if( rc != grb::SUCCESS ) { - MASTER_PRINT( pid, " error: " << toString( rc ) ); - return rc; - } - double coarsener_gen_time{ timer.time() }; + for( size_t coarsening_level = 0UL; + min_physical_size >= params.min_phys_size && coarsening_level <= params.max_levels; + coarsening_level++ ) { - rc = build_base_system< DIMS, T, T, grb::algorithms::multi_grid_data< T, T > >( *new_coarser, coarser_size, coarser_sizes, params.halo_size, - params.diag_value, params.non_diag_value, times ); - if( rc != grb::SUCCESS ) { - MASTER_PRINT( pid, " error: " << toString( rc ) ); - return rc; - } - MASTER_PRINT( pid, "-- level generation time (ms) " - "(level,coarsening matrix,system matrix,vectors,coloring,color masks):" - << coarsening_level << "," << coarsener_gen_time << "," << times[ 0 ] << "," << times[ 1 ] - << "," << times[ 2 ] << "," << times[ 3 ] << std::endl; - ); + // build generator + mg_generators.emplace_back( coord_sizes, params.halo_size, params.diag_value, params.non_diag_value ); // prepare for new iteration - coarser = &new_coarser->coarser_level; - min_physical_coarsened_size /= params.coarsening_step; - previous_sizes = coarser_sizes; - divide_array( coarser_sizes, coarser_sizes, params.coarsening_step ); - coarsening_level++; + min_physical_size /= params.coarsening_step; + std::for_each( coord_sizes.begin(), coord_sizes.end(), + [ ¶ms ]( coord_t &v ){ v /= params.coarsening_step; }); } - return rc; } } // namespace algorithms } // namespace grb -#ifdef INTERNAL_MASTER_PRINT -#undef INTERNAL_MASTER_PRINT -#undef MASTER_PRINT -#endif - -#endif // _H_GRB_ALGORITHMS_SYSTEM_BUILDING_UTILS +#endif // _H_GRB_ALGORITHMS_HPCG_SYSTEM_BUILDING_UTILS diff --git a/include/graphblas/algorithms/multigrid/coarsener.hpp b/include/graphblas/algorithms/multigrid/coarsener.hpp new file mode 100644 index 000000000..47116c22a --- /dev/null +++ b/include/graphblas/algorithms/multigrid/coarsener.hpp @@ -0,0 +1,197 @@ + +/* + * Copyright 2022 Huawei Technologies Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file hpcg_data.hpp + * @author Alberto Scolari (alberto.scolari@huawei.com) + * @brief Implementation of the coarsener of HPCG + * @date 2022-11-08 + */ + +#ifndef _H_GRB_ALGORITHMS_HPCG_COARSENER +#define _H_GRB_ALGORITHMS_HPCG_COARSENER + +#include +#include + +#include + +#include "multigrid_data.hpp" + +namespace grb { + namespace algorithms { + + template< + typename IOType, + typename NonzeroType + > + struct coarsening_data { + + grb::Matrix< NonzeroType > coarsening_matrix; ///< matrix of size #system_size \f$ \times \f$ #finer_size + ///< to coarsen an input vector of size #finer_size into a vector of size #system_size + grb::Vector< IOType > Ax_finer; ///< finer vector for intermediate computations, of size #finer_size + + /** + * @brief Construct a new \c coarsening_data by initializing internal data structures + * @param[in] coarser_size size of the current system, i.e. size \b after coarsening + * @param[in] _finer_size size of the finer system, i.e. size of external objects \b before coarsening + */ + coarsening_data( size_t _finer_size, size_t coarser_size ) : + coarsening_matrix( coarser_size, _finer_size ), + Ax_finer( _finer_size ) {} + + grb::RC zero_temp_vectors() { + return grb::set( Ax_finer, 0 ); + } + }; + + namespace internal { + + /** + * @brief computes the coarser residual vector \p coarsening_data.r by coarsening + * \p coarsening_data.Ax_finer - \p r_fine via \p coarsening_data.coarsening_matrix. + * + * The coarsening information are stored inside \p coarsening_data. + * + * @tparam IOType type of result and intermediate vectors used during computation + * @tparam NonzeroType type of matrix values + * @tparam Ring the ring of algebraic operators zero-values + * @tparam Minus the minus operator for subtractions + * + * @param[in] r_fine fine residual vector + * @param[in,out] coarsening_data \ref multigrid_data data structure storing the information for coarsening + * @param[in] ring the ring to perform the operations on + * @param[in] minus the \f$ - \f$ operator for vector subtractions + * @return grb::RC::SUCCESS if the algorithm could correctly terminate, the error code of the first + * unsuccessful operation otherwise + */ + template< + typename IOType, + typename NonzeroType, + class Ring, + class Minus + > grb::RC compute_coarsening( + const grb::Vector< IOType > & r_fine, // fine residual + grb::Vector< IOType > & r_coarse, // fine residual + coarsening_data< IOType, NonzeroType > & coarsening_data, + const Ring & ring, + const Minus & minus + ) { + RC ret { SUCCESS }; + // DBG_print_norm( coarsening_data.Ax_finer, "+++ Ax_finer prima" ); + ret = ret ? ret : grb::eWiseApply( coarsening_data.Ax_finer, r_fine, coarsening_data.Ax_finer, + minus ); // Ax_finer = r_fine - Ax_finer + // DBG_print_norm( coarsening_data.Ax_finer, "+++ Ax_finer dopo" ); + assert( ret == SUCCESS ); + + // actual coarsening, from ncols(*coarsening_data->A) == *coarsening_data->system_size * 8 + // to *coarsening_data->system_size + ret = ret ? ret : grb::set( r_coarse, 0 ); + ret = ret ? ret : grb::mxv< grb::descriptors::dense >( r_coarse, coarsening_data.coarsening_matrix, + coarsening_data.Ax_finer, ring ); // r = coarsening_matrix * Ax_finer + // DBG_print_norm( r_coarse, "+++ r_coarse" ); + return ret; + } + + /** + * @brief computes the prolongation of the coarser solution \p coarsening_data.z and stores it into + * \p x_fine. + * + * For prolongation, this function uses the matrix \p coarsening_data.coarsening_matrix by transposing it. + * + * @tparam IOType type of result and intermediate vectors used during computation + * @tparam NonzeroType type of matrix values + * @tparam Ring the ring of algebraic operators zero-values + * + * @param[out] x_fine the solution vector to store the prolonged solution into + * @param[in,out] coarsening_data information for coarsening + * @param[in] ring the ring to perform the operations on + * @return grb::RC::SUCCESS if the algorithm could correctly terminate, the error code of the first + * unsuccessful operation otherwise + */ + template< + typename IOType, + typename NonzeroType, + class Ring + > grb::RC compute_prolongation( + const grb::Vector< IOType > & z_coarse, + grb::Vector< IOType > & x_fine, // fine residual + grb::algorithms::coarsening_data< IOType, NonzeroType > & coarsening_data, + const Ring & ring + ) { + RC ret { SUCCESS }; + // actual refining, from *coarsening_data->syztem_size == nrows(*coarsening_data->A) / 8 + // to nrows(x_fine) + ret = ret ? ret : set( coarsening_data.Ax_finer, 0 ); + + ret = ret ? ret : grb::mxv< grb::descriptors::transpose_matrix | grb::descriptors::dense >( + coarsening_data.Ax_finer, coarsening_data.coarsening_matrix, z_coarse, ring ); + assert( ret == SUCCESS ); + + ret = ret ? ret : grb::foldl( x_fine, coarsening_data.Ax_finer, ring.getAdditiveMonoid() ); // x_fine += Ax_finer; + assert( ret == SUCCESS ); + return ret; + } + + } // namespace internal + + template< + typename IOType, + typename NonzeroType, + class Ring, + class Minus + > struct single_point_coarsener { + + static_assert( std::is_default_constructible< Ring >::value, + "cannot construct the Ring with default values" ); + static_assert( std::is_default_constructible< Minus >::value, + "cannot construct the Minus operator with default values" ); + + using MultiGridInputType = multigrid_data< IOType, NonzeroType >; + + // default value: override with your own + std::vector< std::unique_ptr< grb::algorithms::coarsening_data< IOType, NonzeroType > > > coarsener_levels; + Ring ring; + Minus minus; + + + // single_point_coarsener() = default; + + inline grb::RC coarsen_residual( + const MultiGridInputType &finer, + MultiGridInputType &coarser + ) { + // first compute the residual + coarsening_data< IOType, NonzeroType > &coarsener = *coarsener_levels[ finer.level ]; + grb::RC ret = grb::set( coarsener.Ax_finer, 0 ); + ret = ret ? ret : grb::mxv< grb::descriptors::dense >( coarsener.Ax_finer, finer.A, finer.z, ring ); + // DBG_print_norm( coarsener.Ax_finer, "temp Axf" ); + return internal::compute_coarsening( finer.r, coarser.r, coarsener, ring, minus ); + } + + inline grb::RC prolong_solution( + const MultiGridInputType &coarser, + MultiGridInputType &finer + ) { + return internal::compute_prolongation( coarser.z, finer.z, *coarsener_levels[ finer.level ], ring ); + } + }; + + } // namespace algorithms +} // namespace grb + +#endif // _H_GRB_ALGORITHMS_HPCG_COARSENER diff --git a/include/graphblas/algorithms/multigrid/multigrid_building_utils.hpp b/include/graphblas/algorithms/multigrid/multigrid_building_utils.hpp new file mode 100644 index 000000000..714555426 --- /dev/null +++ b/include/graphblas/algorithms/multigrid/multigrid_building_utils.hpp @@ -0,0 +1,56 @@ + +/* + * Copyright 2022 Huawei Technologies Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#ifndef _H_GRB_ALGORITHMS_MULTIGRID_BUILDING_UTILS +#define _H_GRB_ALGORITHMS_MULTIGRID_BUILDING_UTILS + +namespace grb { + namespace algorithms { + + template< + typename MGInfoType, + typename CoarsenerInfoType, + typename SmootherInfoType + > void allocate_multigrid_data( + const std::vector< size_t > &mg_sizes, + std::vector< std::unique_ptr< MGInfoType > > &system_levels, + std::vector< std::unique_ptr< CoarsenerInfoType > > &coarsener_levels, + std::vector< std::unique_ptr< SmootherInfoType > > &smoother_levels + ) { + if( mg_sizes.size() == 0 ) { + throw std::invalid_argument( "at least one size should be available" ); + } + size_t finer_size = mg_sizes[ 0 ]; + system_levels.emplace_back( new MGInfoType( 0, finer_size ) ); // create main system + smoother_levels.emplace_back( new SmootherInfoType( finer_size ) ); // create smoother for main + for( size_t i = 1; i < mg_sizes.size(); i++ ) { + size_t coarser_size = mg_sizes[ i ]; + coarsener_levels.emplace_back( new CoarsenerInfoType( finer_size, coarser_size ) ); + system_levels.emplace_back( new MGInfoType( i, coarser_size ) ); + smoother_levels.emplace_back( new SmootherInfoType( coarser_size ) ); + finer_size = coarser_size; + } + } + + } // namespace algorithms +} // namespace grb + +#endif // _H_GRB_ALGORITHMS_MULTIGRID_BUILDING_UTILS diff --git a/include/graphblas/algorithms/multigrid/multigrid_cg.hpp b/include/graphblas/algorithms/multigrid/multigrid_cg.hpp new file mode 100644 index 000000000..2ac3c0770 --- /dev/null +++ b/include/graphblas/algorithms/multigrid/multigrid_cg.hpp @@ -0,0 +1,360 @@ + +/* + * Copyright 2021 Huawei Technologies Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file hpcg.hpp + * @author Alberto Scolari (alberto.scolari@huawei.com) + * @brief File with the main routine to run a full HPCG simulation, comprising multi-grid runs + * with Red-Black Gauss-Seidel smoothing. + * @date 2021-04-30 + */ + +#ifndef _H_GRB_ALGORITHMS_MULTIGRID_CG +#define _H_GRB_ALGORITHMS_MULTIGRID_CG + +#include +#include + +#include + +#include "multigrid_data.hpp" + +#include + + +namespace grb { + namespace algorithms { + + /** + * @brief Data stucture to store the data for a full HPCG run: system vectors and matrix, + * coarsening information and temporary vectors. + * + * This data structures contains all the needed vectors and matrices to solve a linear system + * \f$ A x = b \f$. As for \ref system_data, internal elements are built and their sizes properly initialized + * to #system_size, but internal values are \b not initialized, as they are left to user's logic. + * Similarly, the coarsening information in #coarser_level is to be initialized by users by properly + * building a \code multigrid_data \endcode object and storing its pointer into + * #coarser_level; on destruction, #coarser_level will also be properly destroyed without + * user's intervention. + * + * @tparam IOType type of values of the vectors for intermediate results + * @tparam NonzeroType type of the values stored inside the system matrix #A + * @tparam InputType type of the values of the right-hand side vector #b + */ + template< + typename IOType, + typename NonzeroType, + typename InputType + > struct mg_cg_data { + + grb::Vector< InputType > b; ///< right-side vector of known values + grb::Vector< IOType > u; ///< temporary vectors (typically for CG exploration directions) + grb::Vector< IOType > p; ///< temporary vector (typically for x refinements coming from the multi-grid run) + grb::Vector< IOType > x; // system solution being refined over the iterations: it us up to the user + ///< to set the initial solution value + + + /** + * @brief Construct a new \c hpcg_data object by building vectors and matrices and by setting + * #coarser_level to \c nullptr (i.e. no coarser level is assumed). + * + * @param[in] sys_size the size of the simulated system, i.e. of all the internal vectors and matrices + */ + mg_cg_data( size_t sys_size ) : + b( sys_size ), + u( sys_size ), + p( sys_size ), + x( sys_size ) {} + + grb::RC zero_temp_vectors() { + grb::RC rc = grb::set( u, 0 ); + rc = rc ? rc : grb::set( p, 0 ); + return rc; + } + }; + + template < + typename IOType, + typename ResidualType, + class Ring = Semiring< grb::operators::add< IOType >, grb::operators::mul< IOType >, grb::identities::zero, grb::identities::one >, + class Minus = operators::subtract< IOType > + > + struct cg_options { + bool with_preconditioning; + size_t max_iterations; + ResidualType tolerance; + bool print_iter_stats; + Ring ring; + Minus minus; + }; + + + template < typename ResidualType > struct cg_out_data { + size_t iterations; + ResidualType norm_residual; + }; + + /** + * @brief High-Performance Conjugate Gradient algorithm implementation running entirely on GraphBLAS. + * + * Finds the solution x of an \f$ A x = b \f$ algebraic system by running the HPCG algorithm. + * The implementation here closely follows the reference HPCG benchmark used for the HPCG500 rank, + * visible at https://github.com/hpcg-benchmark/hpcg. + * The only difference is the usage of a Red-Black Gauss-Seidel smoother instead of the standard one + * for performance reasons, as the standard Gauss-Seidel algorithm is inherently sequential and not + * expressible in terms of standard linear algebra operations. + * In particular, this implementation (as the standard one) couples a standard CG algorithm with a V-cycle + * multi-grid solver to initially refine the tentative solution. This refinement step depends on the + * availability of coarsening information, which should be stored inside \p data; otherwise, + * the refinement is not performed and only the CG algorithm is run. For more information on inputs + * and on coarsening information, you may consult the \ref hpcg_data class documentation. + * + * This implementation assumes that the vectors and matrices inside \p data are all correctly initialized + * and populated with the proper values; in particular + * - hpcg_data#x with the initial tentative solution (iterative solutions are also stored here) + * - hpcg_data#A with the system matrix + * - hpcg_data#b with the right-hand side vector \f$ b \f$ + * - hpcg_data#A_diagonal with the diagonal values of the matrix + * - hpcg_data#color_masks with the color masks for this level + * - hpcg_data#coarser_level with the information for the coarser multi-grid run (if any) + * The other vectors are assumed to be inizialized (via the usual grb::Vector#Vector(size_t) constructor) + * but not necessarily populated with values, as they are internally populated when needed; hence, + * any previous values are overwritten. + * + * Failuers of GraphBLAS operations are handled by immediately stopping the execution and by returning + * the failure code. + * + * @tparam IOType type of result and intermediate vectors used during computation + * @tparam ResidualType type of the residual norm + * @tparam NonzeroType type of matrix values + * @tparam InputType type of values of the right-hand side vector b + * @tparam Ring the ring of algebraic operators zero-values + * @tparam Minus the minus operator for subtractions + * + * @param[in,out] data \ref hpcg_data object storing inputs, outputs and temporary vectors used for the computation, + * as long as the information for the recursive multi-grid runs + * @param[in] with_preconditioning whether to use pre-conditioning, i.e. to perform multi-grid runs + * @param[in] presmoother_steps number of pre-smoother steps, for multi-grid runs + * @param[in] postsmoother_steps nomber of post-smoother steps, for multi-grid runs + * @param[in] max_iterations maximum number if iterations the simulation may run for; once reached, + * the simulation stops even if the residual norm is above \p tolerance + * @param[in] tolerance the tolerance over the residual norm, i.e. the value of the residual norm to stop + * the simulation at + * @param[out] iterations numbers of iterations performed + * @param[out] norm_residual norm of the final residual + * @param[in] ring the ring to perform the operations on + * @param[in] minus the \f$ - \f$ operator for vector subtractions + * @return grb::RC::SUCCESS if the algorithm could correctly terminate, the error code of the first + * unsuccessful operation otherwise + */ + template< + typename IOType, + typename ResidualType, + typename NonzeroType, + typename InputType, + typename MultiGridrunnerType, + class Ring = Semiring< grb::operators::add< IOType >, grb::operators::mul< IOType >, grb::identities::zero, grb::identities::one >, + class Minus = operators::subtract< IOType > + > grb::RC mg_cg( + multigrid_data< IOType, NonzeroType > &grid_base, + mg_cg_data< IOType, NonzeroType, InputType > &data, + const cg_options< IOType, ResidualType > &cg_opts, + MultiGridrunnerType &multigrid_runner, + cg_out_data< ResidualType > &out_data + ) { + ResidualType alpha; + + const grb::Matrix< NonzeroType > &A { grid_base.A }; + grb::Vector< IOType > &r { grid_base.r }; // residual vector + grb::Vector< IOType > &z { grid_base.z }; // pre-conditioned residual vector + grb::Vector< IOType > &x { data.x }; + const grb::Vector< InputType > &b { data.b }; + grb::Vector< IOType > &p { data.p }; // direction vector + grb::Vector< IOType > &Ap { data.u }; // temp vector + grb::RC ret { SUCCESS }; + + ret = ret ? ret : grb::set( Ap, 0 ); + ret = ret ? ret : grb::set( r, 0 ); + ret = ret ? ret : grb::set( p, 0 ); + + ret = ret ? ret : grb::set( p, x ); + ret = ret ? ret : grb::mxv< grb::descriptors::dense >( Ap, A, x, cg_opts.ring ); // Ap = A * x + assert( ret == SUCCESS ); + + ret = ret ? ret : grb::eWiseApply( r, b, Ap, cg_opts.minus ); // r = b - Ap; + assert( ret == SUCCESS ); + + ResidualType norm_residual = cg_opts.ring.template getZero< ResidualType >(); + ret = ret ? ret : grb::dot( norm_residual, r, r, cg_opts.ring ); // norm_residual = r' * r; + assert( ret == SUCCESS ); + + // compute sqrt to avoid underflow + norm_residual = std::sqrt( norm_residual ); + + // initial norm of residual + out_data.norm_residual = norm_residual; + const ResidualType norm_residual_initial { norm_residual }; + ResidualType old_r_dot_z { 0.0 }, r_dot_z { 0.0 }, beta { 0.0 }; + size_t iter { 0 }; + + grb::utils::Timer timer; + +#ifdef HPCG_PRINT_STEPS + DBG_print_norm( p, "start p" ); + DBG_print_norm( Ap, "start Ap" ); + DBG_print_norm( r, "start r" ); +#endif + + do { +#ifdef HPCG_PRINT_STEPS + DBG_println( "========= iteration " << iter << " =========" ); +#endif + if( cg_opts.with_preconditioning ) { + if( cg_opts.print_iter_stats ) { + timer.reset(); + } + ret = ret ? ret : multigrid_runner( grid_base ); + assert( ret == SUCCESS ); + if( cg_opts.print_iter_stats ) { + double duration = timer.time(); + std::cout << "iteration, pre-conditioner: " << iter << "," + << duration << std::endl; + } + } else { + ret = ret ? ret : grb::set( z, r ); // z = r; + assert( ret == SUCCESS ); + } +#ifdef HPCG_PRINT_STEPS + DBG_print_norm( z, "initial z" ); +#endif + + ResidualType pAp; + + if( iter == 0 ) { + ret = ret ? ret : grb::set( p, z ); // p = z; + assert( ret == SUCCESS ); + + ret = ret ? ret : grb::dot( r_dot_z, r, z, cg_opts.ring ); // r_dot_z = r' * z; + assert( ret == SUCCESS ); + } else { + old_r_dot_z = r_dot_z; + + r_dot_z = cg_opts.ring.template getZero< ResidualType >(); + ret = ret ? ret : grb::dot( r_dot_z, r, z, cg_opts.ring ); // r_dot_z = r' * z; + assert( ret == SUCCESS ); + + beta = r_dot_z / old_r_dot_z; + ret = ret ? ret : grb::clear( Ap ); // Ap = 0; + ret = ret ? ret : grb::eWiseMulAdd( Ap, beta, p, z, cg_opts.ring ); // Ap += beta * p + z; + std::swap( Ap, p ); // p = Ap; + assert( ret == SUCCESS ); + } +#ifdef HPCG_PRINT_STEPS + DBG_print_norm( p, "middle p" ); +#endif + + ret = ret ? ret : grb::set( Ap, 0 ); + ret = ret ? ret : grb::mxv< grb::descriptors::dense >( Ap, A, p, cg_opts.ring ); // Ap = A * p; + assert( ret == SUCCESS ); +#ifdef HPCG_PRINT_STEPS + DBG_print_norm( Ap, "middle Ap" ); +#endif + pAp = cg_opts.ring.template getZero< ResidualType >(); + ret = ret ? ret : grb::dot( pAp, Ap, p, cg_opts.ring ); // pAp = p' * Ap + assert( ret == SUCCESS ); + + alpha = r_dot_z / pAp; + + ret = ret ? ret : grb::eWiseMul( x, alpha, p, cg_opts.ring ); // x += alpha * p; + assert( ret == SUCCESS ); +#ifdef HPCG_PRINT_STEPS + DBG_print_norm( x, "end x" ); +#endif + + ret = ret ? ret : grb::eWiseMul( r, -alpha, Ap, cg_opts.ring ); // r += - alpha * Ap; + assert( ret == SUCCESS ); +#ifdef HPCG_PRINT_STEPS + DBG_print_norm( r, "end r" ); +#endif + + norm_residual = cg_opts.ring.template getZero< ResidualType >(); + ret = ret ? ret : grb::dot( norm_residual, r, r, cg_opts.ring ); // residual = r' * r; + assert( ret == SUCCESS ); + + norm_residual = std::sqrt( norm_residual ); + + if( cg_opts.print_iter_stats ) { + std::cout << "iteration, residual: " << iter << "," << norm_residual << std::endl; + } + + ++iter; + out_data.iterations = iter; + out_data.norm_residual = norm_residual; + } while( iter < cg_opts.max_iterations && + norm_residual / norm_residual_initial > cg_opts.tolerance && ret == SUCCESS ); + + return ret; + } + + + + + template< + typename IOType, + typename NonzeroType, + typename InputType, + typename ResidualType, + typename MultiGridRunnerType, + class Ring, + class Minus + + > struct mg_cg_runner { + + using HPCGInputType = mg_cg_data< IOType, NonzeroType, InputType >; + + static_assert( std::is_default_constructible< Ring >::value, + "cannot construct the Ring with default values" ); + static_assert( std::is_default_constructible< Minus >::value, + "cannot construct the Minus operator with default values" ); + // static_assert( std::is_copy_constructible< MultiGridRunnerType >::value, + // "cannot construct the Multi-Grid runner by copy" ); + static_assert( std::is_move_constructible< MultiGridRunnerType >::value, + "cannot construct the Multi-Grid runner by move" ); + + // default value: override with your own + cg_options< IOType, ResidualType, Ring, Minus > cg_opts{ true, 10, 0.0, false, Ring(), Minus() }; + + MultiGridRunnerType mg_runner; + + mg_cg_runner( + MultiGridRunnerType &&_mg_runner + ) : mg_runner( std::move( _mg_runner ) ) {} + + inline grb::RC operator()( + typename MultiGridRunnerType::MultiGridInputType &grid_base, + mg_cg_data< IOType, NonzeroType, InputType > &data, + cg_out_data< ResidualType > &out_data + ) { + return mg_cg( grid_base, data, cg_opts, mg_runner, out_data ); + } + + }; + + } // namespace algorithms +} // namespace grb + +#endif // _H_GRB_ALGORITHMS_MULTIGRID_CG diff --git a/include/graphblas/algorithms/multigrid/multigrid_data.hpp b/include/graphblas/algorithms/multigrid/multigrid_data.hpp new file mode 100644 index 000000000..e76063aec --- /dev/null +++ b/include/graphblas/algorithms/multigrid/multigrid_data.hpp @@ -0,0 +1,105 @@ + +/* + * Copyright 2021 Huawei Technologies Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file hpcg_data.hpp + * @author Alberto Scolari (alberto.scolari@huawei.com) + * @brief Data structures to store HPCG input/output data. + * @date 2021-04-30 + */ + +#ifndef _H_GRB_ALGORITHMS_HPCG_DATA +#define _H_GRB_ALGORITHMS_HPCG_DATA + +#include +#include + +#include + + +namespace grb { + + namespace algorithms { + + /** + * @brief Data container for all multi-grid inputs and outputs. + * + * @tparam IOType Type of values of the vectors for intermediate results + * @tparam NonzeroType Type of the values stored inside the system matrix \p A + * and the coarsening matrix #Ax_finer + * + * This data structure stores information for a full multi-grid V cycle, i.e. + * - input and output vectors for solution, residual and temporary vectors + * - coarsening information, in particular the #coarsening_matrix that + * coarsens a larger system of size #finer_size to the current system + * of size #system_size + * - the next level of coarsening, pointed to by #coarser_level, possibly being \c nullptr + * if no further coarsening is desired; note that this information is automatically + * destructed on object destruction (if any) + * + * Vectors stored here refer to the \b coarsened system (with the exception of #Ax_finer), + * thus having size #system_size; this also holds for the system matrix #A, + * while #coarsening_matrix has size #system_size \f$ \times \f$ #finer_size. + * Hence, the typical usage of this data structure is to coarsen \b external vectors, e.g. vectors + * coming from another \code multigrid_data \endcode object whose #system_size equals + * \code this-> \endcode #fines_size, via \code this-> \endcode #coarsening_matrix and store the coarsened + * vectors internally. Mimicing the recursive behavior of standard multi-grid simulations, + * the information for a further coarsening is stored inside #coarser_level, so that the + * hierarchy of coarsened levels is reflected inside this data structure. + * + * As for \ref system_data, internal vectors and matrices are initialized to the proper size, + * but their values are \b not initialized. + */ + template< + typename IOType, + typename NonzeroType + > struct multigrid_data { + + const size_t level; + const size_t system_size; ///< size of the system, i.e. side of the #A + grb::Matrix< NonzeroType > A; ///< system matrix + grb::Vector< IOType > z; ///< multi-grid solution + grb::Vector< IOType > r; ///< residual + + multigrid_data( + size_t _level, + size_t sys_size + ) : + level( _level ), + system_size( sys_size ), + A( sys_size, sys_size ), + z( sys_size ), + r( sys_size ) {} + + // for safety, disable copy semantics + multigrid_data( const multigrid_data< IOType, NonzeroType > & o ) = delete; + + multigrid_data & operator=( const multigrid_data< IOType, NonzeroType > & ) = delete; + + grb::RC zero_temp_vectors() { + grb::RC rc = grb::set( z, 0 ); + rc = rc ? rc : grb::set( r, 0 ); + return rc; + } + }; + + } // namespace algorithms + +} // namespace grb + +#endif // _H_GRB_ALGORITHMS_HPCG_DATA + diff --git a/include/graphblas/algorithms/multigrid/multigrid_v_cycle.hpp b/include/graphblas/algorithms/multigrid/multigrid_v_cycle.hpp new file mode 100644 index 000000000..77b785e2d --- /dev/null +++ b/include/graphblas/algorithms/multigrid/multigrid_v_cycle.hpp @@ -0,0 +1,237 @@ + +/* + * Copyright 2021 Huawei Technologies Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file multigrid_v_cycle.hpp + * @author Alberto Scolari (alberto.scolari@huawei.com) + * @brief This file contains the routines for multi-grid solution refinement, including the main routine + * and those for coarsening and refinement of the tentative solution. + * @date 2021-04-30 + */ + +#ifndef _H_GRB_ALGORITHMS_MULTIGRID_V_CYCLE +#define _H_GRB_ALGORITHMS_MULTIGRID_V_CYCLE + +#include +#include +#include +#include +#include + +#include + +#include + +#include "multigrid_data.hpp" + +namespace grb { + namespace algorithms { + /** + * @brief Namespace for interfaces that should not be used outside of the algorithm namespace. + */ + namespace internal { + + + + } // namespace internal + + /** + * @brief Multi-grid V cycle implementation to refine a given solution. + * + * A full multi-grid run goes through the following steps: + * -# if \p presmoother_steps \f$ > 0 \f$, \p presmoother_steps of the Red-Black Gauss-Seidel smoother are run + * to improve on the initial solution stored into \p data.z + * -# the coarsening of \f$ r - A*z \f$ is computed to find the coarser residual vector + * -# a multi-grid run is recursively performed on the coarser system + * -# the tentative solution from the coarser multi-grid run is prolonged and added to the current tentative solution + * into \p data.z + * -# this solution is further smoothed for \p postsmoother_steps steps + * + * If coarsening information is not available, the multi-grid run consists in a single smmothing run. + * + * Failuers of GraphBLAS operations are handled by immediately stopping the execution and by returning + * the failure code. + * + * @tparam IOType type of result and intermediate vectors used during computation + * @tparam NonzeroType type of matrix values + * @tparam Ring the ring of algebraic operators zero-values + * @tparam Minus the minus operator for subtractions + * + * @param[in,out] data \ref multigrid_data object storing the relevant data for the multi-grid run of the current + * clevel + * @param[in,out] coarsening_data pointer to information for the coarsening/refinement operations and for the + * recursive multi-grid run on the coarsened system; if \c nullptr, no coarsening/refinement occurs + * and only smoothing occurs on the current solution + * @param[in] presmoother_steps number of pre-smoother steps + * @param[in] postsmoother_steps number of post-smoother steps + * @param[in] ring the ring to perform the operations on + * @param[in] minus the \f$ - \f$ operator for vector subtractions + * @return grb::RC::SUCCESS if the algorithm could correctly terminate, the error code of the first + * unsuccessful operation otherwise + */ + template< + typename IOType, + typename NonzeroType, + typename MGSysIterType, + typename MGSmootherType, + typename CoarsenerType, + class Ring, + class Minus + > grb::RC multi_grid( + MGSysIterType mgiter_begin, + const MGSysIterType mgiter_end, + MGSmootherType &smoother, + CoarsenerType &coarsener, + const Ring &ring, + const Minus &minus + ) { + static_assert( std::is_base_of< multigrid_data< IOType, NonzeroType >, + typename std::decay< decltype( *mgiter_begin ) >::type >::value, "the iterator type MGSysIterType" + " must reference an object of type multigrid_data< IOType, NonzeroType >" ); + + RC ret { SUCCESS }; + assert( mgiter_begin != mgiter_end ); + multigrid_data< IOType, NonzeroType > &finer_system = *mgiter_begin; + ++mgiter_begin; + +#ifdef HPCG_PRINT_STEPS + DBG_println( "mg BEGINNING {" ); +#endif + + + // clean destination vector + ret = ret ? ret : grb::set( finer_system.z, 0 ); +#ifdef HPCG_PRINT_STEPS + DBG_print_norm( finer_system.r, "initial r" ); +#endif + if( !( mgiter_begin != mgiter_end ) ) { + // compute one round of Gauss Seidel and return + ret = ret ? ret : smoother.nonrecursive_smooth( finer_system ); + assert( ret == SUCCESS ); +#ifdef HPCG_PRINT_STEPS + DBG_print_norm( finer_system.z, "smoothed z" ); + DBG_println( "} mg END" ); +#endif + return ret; + } + multigrid_data< IOType, NonzeroType > &coarser_system = *mgiter_begin; + + // pre-smoother + ret = ret ? ret : smoother.pre_smooth( finer_system ); + assert( ret == SUCCESS ); +#ifdef HPCG_PRINT_STEPS + DBG_print_norm( finer_system.z, "pre-smoothed z" ); +#endif + + ret = ret ? ret : coarsener.coarsen_residual( finer_system, coarser_system ); + assert( ret == SUCCESS ); +#ifdef HPCG_PRINT_STEPS + DBG_print_norm( coarser_system.r, "coarse r" ); +#endif + + ret = ret ? ret : multi_grid< IOType, NonzeroType, MGSysIterType, + MGSmootherType, CoarsenerType, Ring, Minus >( mgiter_begin, mgiter_end, + smoother, coarsener, ring, minus ); + assert( ret == SUCCESS ); + + ret = ret ? ret : coarsener.prolong_solution( coarser_system, finer_system ); + assert( ret == SUCCESS ); +#ifdef HPCG_PRINT_STEPS + DBG_print_norm( finer_system.z, "prolonged z" ); +#endif + + // post-smoother + ret = ret ? ret : smoother.post_smooth( finer_system ); + assert( ret == SUCCESS ); +#ifdef HPCG_PRINT_STEPS + DBG_print_norm( finer_system.z, "post-smoothed z" ); + DBG_println( "} mg END" ); +#endif + + return ret; + } + + template< + typename IOType, + typename NonzeroType, + typename InputType, + typename MGSmootherType, + typename CoarsenerType, + class Ring, + class Minus + > struct multigrid_runner { + + static_assert( std::is_default_constructible< Ring >::value, + "cannot construct the Ring with default values" ); + static_assert( std::is_default_constructible< Minus >::value, + "cannot construct the Minus operator with default values" ); + static_assert( std::is_move_constructible< MGSmootherType >::value, + "MGSmootherType must be move-constructible"); + static_assert( std::is_move_constructible< CoarsenerType >::value, + "CoarsenerType must be move-constructible"); + + using MultiGridInputType = multigrid_data< IOType, NonzeroType >; + + // check the interface between HPCG and MG match + static_assert( std::is_base_of< typename MGSmootherType::SmootherInputType, + MultiGridInputType >::value, "input type of the Smoother kernel must match the input from Multi-Grid" ); + + MGSmootherType smoother_runner; + CoarsenerType coarsener_runner; + std::vector< std::unique_ptr< MultiGridInputType > > system_levels; + Ring ring; + Minus minus; + + struct Extractor { + MultiGridInputType & operator()( + typename std::vector< std::unique_ptr< MultiGridInputType > >::reference &ref + ) { + return *ref.get(); + } + + const MultiGridInputType & operator()( + typename std::vector< std::unique_ptr< MultiGridInputType > >::const_reference &ref + ) const { + return *ref.get(); + } + }; + + using UniquePtrExtractor = grb::utils::IteratorValueAdaptor< + typename std::vector< std::unique_ptr< MultiGridInputType > >::iterator, + Extractor + >; + + + multigrid_runner( + MGSmootherType &&_smoother_runner, + CoarsenerType &&_coarsener_runner + ) : smoother_runner( std::move( _smoother_runner ) ), + coarsener_runner( std::move( _coarsener_runner ) ) {} + + inline grb::RC operator()( + MultiGridInputType &system + ) { + return multi_grid< IOType, NonzeroType, UniquePtrExtractor, MGSmootherType, CoarsenerType, Ring, Minus >( + UniquePtrExtractor( system_levels.begin() += system.level ), UniquePtrExtractor( system_levels.end() ), + smoother_runner, coarsener_runner, ring, minus ); + } + }; + + } // namespace algorithms +} // namespace grb + +#endif // _H_GRB_ALGORITHMS_MULTIGRID_V_CYCLE diff --git a/include/graphblas/algorithms/hpcg/red_black_gauss_seidel.hpp b/include/graphblas/algorithms/multigrid/red_black_gauss_seidel.hpp similarity index 57% rename from include/graphblas/algorithms/hpcg/red_black_gauss_seidel.hpp rename to include/graphblas/algorithms/multigrid/red_black_gauss_seidel.hpp index 6fdc3c9a3..615b4340b 100644 --- a/include/graphblas/algorithms/hpcg/red_black_gauss_seidel.hpp +++ b/include/graphblas/algorithms/multigrid/red_black_gauss_seidel.hpp @@ -29,8 +29,31 @@ #include +#include "multigrid_data.hpp" + namespace grb { namespace algorithms { + + template< typename IOType > struct smoother_data { + + grb::Vector< IOType > A_diagonal; ///< vector with the diagonal of #A + grb::Vector< IOType > smoother_temp; ///< for smoother's intermediate results + std::vector< grb::Vector< bool > > color_masks; ///< for color masks + + smoother_data( size_t sys_size ) : + A_diagonal( sys_size ), + smoother_temp( sys_size ) { } + + // for safety, disable copy semantics + smoother_data( const smoother_data & o ) = delete; + + smoother_data & operator=( const smoother_data & ) = delete; + + grb::RC zero_temp_vectors() { + return grb::set( smoother_temp, 0 ); + } + }; + namespace internal { /** @@ -50,14 +73,19 @@ namespace grb { * @return grb::RC::SUCCESS if the algorithm could correctly terminate, the error code of the first * unsuccessful operation otherwise */ - template< typename IOType, typename NonzeroType, class Ring > - grb::RC __rbgs_single_step( const grb::Matrix< NonzeroType > & A, + template< + typename IOType, + typename NonzeroType, + class Ring + > grb::RC rbgs_single_step( + const grb::Matrix< NonzeroType > & A, const grb::Vector< IOType > & A_diagonal, const grb::Vector< IOType > & r, grb::Vector< IOType > & x, grb::Vector< IOType > & smoother_temp, const grb::Vector< bool > & color_mask, - const Ring & ring ) { + const Ring & ring + ) { RC ret { SUCCESS }; ret = ret ? ret : grb::set( smoother_temp, 0 ); @@ -105,23 +133,100 @@ namespace grb { * @return grb::RC::SUCCESS if the algorithm could correctly terminate, the error code of the first * unsuccessful operation otherwise */ - template< typename IOType, typename NonzeroType, class Ring > - grb::RC red_black_gauss_seidel( system_data< IOType, NonzeroType > & data, const Ring & ring ) { + template< + typename IOType, + typename NonzeroType, + class Ring + > grb::RC red_black_gauss_seidel( + multigrid_data< IOType, NonzeroType > &data, + smoother_data< IOType > &smoothing_info, + const Ring & ring + ) { RC ret { SUCCESS }; // forward step - std::vector< grb::Vector< bool > >::const_iterator end { data.color_masks.cend() }; - for( std::vector< grb::Vector< bool > >::const_iterator it { data.color_masks.cbegin() }; it != end && ret == SUCCESS; ++it ) { - ret = ret ? ret : __rbgs_single_step( data.A, data.A_diagonal, data.r, data.z, data.smoother_temp, *it, ring ); + std::vector< grb::Vector< bool > >::const_iterator end { smoothing_info.color_masks.cend() }; + for( std::vector< grb::Vector< bool > >::const_iterator it { + smoothing_info.color_masks.cbegin() }; it != end && ret == SUCCESS; ++it ) { + ret = rbgs_single_step( data.A, smoothing_info.A_diagonal, data.r, data.z, + smoothing_info.smoother_temp, *it, ring ); } // backward step - std::vector< grb::Vector< bool > >::const_reverse_iterator rend { data.color_masks.crend() }; - for( std::vector< grb::Vector< bool > >::const_reverse_iterator rit { data.color_masks.crbegin() }; rit != rend && ret == SUCCESS; ++rit ) { - ret = ret ? ret : __rbgs_single_step( data.A, data.A_diagonal, data.r, data.z, data.smoother_temp, *rit, ring ); + std::vector< grb::Vector< bool > >::const_reverse_iterator rend { smoothing_info.color_masks.crend() }; + for( std::vector< grb::Vector< bool > >::const_reverse_iterator rit { + smoothing_info.color_masks.crbegin() }; rit != rend && ret == SUCCESS; ++rit ) { + ret = rbgs_single_step( data.A, smoothing_info.A_diagonal, data.r, data.z, + smoothing_info.smoother_temp, *rit, ring ); } return ret; } } // namespace internal + + template < + typename IOType, + typename NonzeroType, + class Ring + > struct red_black_smoother_runner { + size_t presmoother_steps ; + size_t postsmoother_steps; + size_t non_recursive_smooth_steps; + std::vector< std::unique_ptr< smoother_data< IOType > > > levels; + Ring ring; + + static_assert( std::is_default_constructible< Ring >::value, + "cannot construct the Ring operator with default values" ); + + using SmootherInputType = multigrid_data< IOType, NonzeroType >; + + inline grb::RC pre_smooth( + SmootherInputType& data + ) { + return run_smoother( data, presmoother_steps ); + } + + inline grb::RC post_smooth( + SmootherInputType& data + ) { + return run_smoother( data, postsmoother_steps ); + } + + inline grb::RC nonrecursive_smooth( + SmootherInputType& data + ) { + return run_smoother( data, non_recursive_smooth_steps ); + } + + /** + * @brief Runs \p smoother_steps iteration of the Red-Black Gauss-Seidel smoother, with inputs and outputs stored + * inside \p data. + * + * @tparam IOType type of result and intermediate vectors used during computation + * @tparam NonzeroType type of matrix values + * @tparam Ring the ring of algebraic operators zero-values + * + * @param[in,out] data \ref system_data data structure with relevant inpus and outputs: system matrix, initial solution, + * residual, system matrix colors, temporary vectors + * @param[in] smoother_steps how many smoothing steps to run + * @param[in] ring the ring to perform the operations on + * @return grb::RC::SUCCESS if the algorithm could correctly terminate, the error code of the first + * unsuccessful operation otherwise + */ + grb::RC run_smoother( + SmootherInputType &data, + const size_t smoother_steps + ) { + RC ret { SUCCESS }; + + smoother_data< IOType > &smoothing_info = *( levels.at( data.level ).get() ); + + for( size_t i { 0 }; i < smoother_steps && ret == SUCCESS; i++ ) { + ret = ret ? ret : internal::red_black_gauss_seidel( data, smoothing_info, ring ); + assert( ret == SUCCESS ); + } + return ret; + } + }; + } // namespace algorithms } // namespace grb diff --git a/include/graphblas/utils/geometry/array_vector_storage.hpp b/include/graphblas/utils/geometry/array_vector_storage.hpp index 451364754..45fbab04e 100644 --- a/include/graphblas/utils/geometry/array_vector_storage.hpp +++ b/include/graphblas/utils/geometry/array_vector_storage.hpp @@ -1,67 +1,100 @@ -#ifndef _ARRAY_VECTOR_STORAGE_H_ -#define _ARRAY_VECTOR_STORAGE_H_ +/* + * Copyright 2022 Huawei Technologies Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file array_vector_storage.cpp + * @author Alberto Scolari (alberto.scolari@huawei.com) + * Extension of std::array<> exposing a larger interface and the underlying + * storage structure. + * + * @date 2022-10-24 + */ + +#ifndef _H_GRB_ALGORITHMS_GEOMETRY_ARRAY_VECTOR_STORAGE +#define _H_GRB_ALGORITHMS_GEOMETRY_ARRAY_VECTOR_STORAGE #include #include #include +#include namespace grb { namespace utils { namespace geometry { -template< typename T, std::size_t DIMS > class array_vector_storage: public std::array< T, DIMS > { - -public: - - using vector_storage = std::array< T, DIMS >&; - using const_vector_storage = const std::array< T, DIMS >&; - - array_vector_storage( std::size_t _dimensions ) { - static_assert( DIMS > 0, "cannot allocate 0-sized array" ); - if( _dimensions != DIMS ) { - throw std::invalid_argument("given dimensions must match the type dimensions"); - } - } - - array_vector_storage() = delete; - - // only copy constructor/assignment, since there's no external storage - array_vector_storage( const array_vector_storage< T, DIMS >& o ) noexcept { - std::copy_n( o.cbegin(), DIMS, this->begin() ); - } - - /* - array_vector_storage( array_vector_storage< T >&& o ) { - std::copy_n( o._storage.cbegin(), DIMS, this->_storage.cbegin() ); - } - */ - - array_vector_storage< T, DIMS >& operator=( const array_vector_storage< T, DIMS > &original ) noexcept { - std::copy_n( original.begin(), DIMS, this->begin() ); - return *this; - } - - //array_vector_storage< T, DIMS >& operator=( array_vector_storage< T, DIMS > &&original ) = delete; - - ~array_vector_storage() {} - - constexpr std::size_t dimensions() const { - return DIMS; - } - - inline vector_storage storage() { - return *this; - } - - inline const_vector_storage storage() const { - return *this; - } - -}; + /** + * Array with fixed size based on std::array with an interface compliant to what other classes + * in the geometry namespace expect, like storage() and dimensions() methods. + * + * It describes a vector of dimensions #dimensions(). + * + * @tparam DataType the data type of the vector elements + * @tparam DIMS the dimensions of the vector + */ + template< + typename DataType, + size_t DIMS + > class ArrayVectorStorage: public std::array< DataType, DIMS > { + + public: + + using VectorStorageType = std::array< DataType, DIMS >&; + using ConstVectorStorageType = const std::array< DataType, DIMS >&; + + ArrayVectorStorage( size_t _dimensions ) { + static_assert( DIMS > 0, "cannot allocate 0-sized array" ); + if( _dimensions != DIMS ) { + throw std::invalid_argument("given dimensions must match the type dimensions"); + } + } + + ArrayVectorStorage() = delete; + + // only copy constructor/assignment, since there's no external storage + ArrayVectorStorage( const ArrayVectorStorage< DataType, DIMS > &o ) noexcept { + std::copy_n( o.cbegin(), DIMS, this->begin() ); + } + + ArrayVectorStorage( ArrayVectorStorage< DataType, DIMS > &&o ) = delete; + + ArrayVectorStorage< DataType, DIMS >& operator=( + const ArrayVectorStorage< DataType, DIMS > &original + ) noexcept { + std::copy_n( original.begin(), DIMS, this->begin() ); + return *this; + } + + ArrayVectorStorage< DataType, DIMS >& operator=( ArrayVectorStorage< DataType, DIMS > &&original ) = delete; + + constexpr size_t dimensions() const { + return DIMS; + } + + inline VectorStorageType storage() { + return *this; + } + + inline ConstVectorStorageType storage() const { + return *this; + } + }; } // namespace geometry } // namespace utils } // namespace grb -#endif // _ARRAY_VECTOR_STORAGE_H_ +#endif // _H_GRB_ALGORITHMS_GEOMETRY_ARRAY_VECTOR_STORAGE diff --git a/include/graphblas/utils/geometry/dynamic_vector_storage.hpp b/include/graphblas/utils/geometry/dynamic_vector_storage.hpp new file mode 100644 index 000000000..a0def1980 --- /dev/null +++ b/include/graphblas/utils/geometry/dynamic_vector_storage.hpp @@ -0,0 +1,154 @@ + +/* + * Copyright 2022 Huawei Technologies Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef _H_GRB_ALGORITHMS_GEOMETRY_DYNAMIC_VECTOR_STORAGE +#define _H_GRB_ALGORITHMS_GEOMETRY_DYNAMIC_VECTOR_STORAGE + +#include +#include +#include + +/** + * @file dynamic_vector_storage.cpp + * @author Alberto Scolari (alberto.scolari@huawei.com) + * Extension of a heap-allocated array exposing the underlying storage and iterators. + * + * @date 2022-10-24 + */ + +namespace grb { + namespace utils { + namespace geometry { + + /** + * Array with fixed size (i.e. decided at object creation) allocated on the heap with an interface compliant + * to what other classes in the geometry namespace expect, like storage() and dimensions() methods. + * + * It describes a vector of dimensions #dimensions(). + * + * @tparam DataType the data type of the vector elements + */ + template< typename DataType > class DynamicVectorStorage { + + size_t _dimensions; + DataType* _storage; + + void clean() { + if( this->_storage != nullptr ) { + delete[] this->_storage; + } + } + + public: + + // iterator fields + using reference = DataType&; + using const_reference = const DataType&; + using iterator = DataType*; + using const_iterator = const DataType*; + using pointer = DataType*; + using const_pointer = const DataType*; + + using VectorStorageType = DataType*; + using ConstVectorStorageType = DataType*; + using SelfType = DynamicVectorStorage< DataType >; + + DynamicVectorStorage( size_t __dimensions ): + _dimensions( __dimensions ) { + if( __dimensions == 0 ) { + throw std::invalid_argument("dimensions cannot be 0"); + } + this->_storage = new DataType[ __dimensions ]; + } + + DynamicVectorStorage() = delete; + + DynamicVectorStorage( const SelfType &o ): + _dimensions( o._dimensions ), + _storage( new DataType[ o._dimensions ] ) + { + std::copy_n( o._storage, o._dimensions, this->_storage ); + } + + DynamicVectorStorage( SelfType &&o ) = delete; + + SelfType& operator=( const SelfType &original ) { + if( original._dimensions != this->_dimensions ) { + this->clean(); + this->_storage = new DataType[ original._dimensions]; + } + this->_dimensions = original._dimensions; + std::copy_n( original._storage, original._dimensions, this->_storage ); + return *this; + } + + SelfType& operator=( SelfType &&original ) = delete; + + ~DynamicVectorStorage() { + this->clean(); + } + + size_t dimensions() const { + return this->_dimensions; + } + + inline iterator begin() { + return this->_storage; + } + + inline iterator end() { + return this->_storage + this->_dimensions; + } + + inline const_iterator begin() const { + return this->_storage; + } + + inline const_iterator end() const { + return this->_storage + this->_dimensions; + } + + inline const_iterator cbegin() const { + return this->_storage; + } + + inline const_iterator cend() const { + return this->_storage + this->_dimensions; + } + + inline VectorStorageType storage() { + return this->_storage; + } + + inline ConstVectorStorageType storage() const { + return this->_storage; + } + + inline reference operator[]( size_t pos ) { + return *( this->_storage + pos); + } + + inline const_reference operator[]( size_t pos ) const { + return *( this->_storage + pos ); + } + }; + + } // namespace geometry + } // namespace utils +} // namespace grb + +#endif // _H_GRB_ALGORITHMS_GEOMETRY_DYNAMIC_VECTOR_STORAGE diff --git a/include/graphblas/utils/geometry/generic_vector_storage.hpp b/include/graphblas/utils/geometry/generic_vector_storage.hpp deleted file mode 100644 index 166dad3b8..000000000 --- a/include/graphblas/utils/geometry/generic_vector_storage.hpp +++ /dev/null @@ -1,117 +0,0 @@ - -#ifndef _GENERIC_VECTOR_STORAGE_H_ -#define _GENERIC_VECTOR_STORAGE_H_ - -#include -#include - -namespace grb { - namespace utils { - namespace geometry { - -template< typename T > class generic_vector_storage { - - std::size_t _dimensions; - T* _storage; - - void clean() { - if( this->_storage != nullptr ) { - delete[] this->_storage; - } - } - -public: - - using reference = T&; - using const_reference = const T&; - using iterator = T*; - using const_iterator = const T*; - using pointer = T*; - using const_pointer = const T*; - using vector_storage = T*; - using const_vector_storage = T*; - - generic_vector_storage( std::size_t __dimensions ): - _dimensions( __dimensions ) { - if( __dimensions == 0 ) { - throw std::invalid_argument("dimensions cannot be 0"); - } - this->_storage = new T[ __dimensions ]; - } - - generic_vector_storage() = delete; - - generic_vector_storage( const generic_vector_storage< T >& o ): - _dimensions( o._dimensions ), _storage( new T[ o._dimensions ] ) { - std::copy_n( o._storage, o._dimensions, this->_storage ); - } - - generic_vector_storage( generic_vector_storage< T >&& o ) = delete; - - generic_vector_storage< T >& operator=( const generic_vector_storage< T > &original ) { - if( original._dimensions != this->_dimensions ) { - this->clean(); - this->_storage = new T[ original._dimensions]; - } - this->_dimensions = original._dimensions; - std::copy_n( original._storage, original._dimensions, this->_storage ); - return *this; - } - - generic_vector_storage< T >& operator=( generic_vector_storage< T > &&original ) = delete; - - ~generic_vector_storage() { - this->clean(); - } - - std::size_t dimensions() const { - return this->_dimensions; - } - - inline iterator begin() { - return this->_storage; - } - - inline iterator end() { - return this->_storage + this->_dimensions; - } - - inline const_iterator begin() const { - return this->_storage; - } - - inline const_iterator end() const { - return this->_storage + this->_dimensions; - } - - inline const_iterator cbegin() const { - return this->_storage; - } - - inline const_iterator cend() const { - return this->_storage + this->_dimensions; - } - - inline vector_storage storage() { - return this->_storage; - } - - inline const_vector_storage storage() const { - return this->_storage; - } - - inline reference operator[]( std::size_t pos ) { - return *( this->_storage + pos); - } - - inline const_reference operator[]( std::size_t pos ) const { - return *( this->_storage + pos ); - } - -}; - - } // namespace geometry - } // namespace utils -} // namespace grb - -#endif // _GENERIC_VECTOR_STORAGE_H_ diff --git a/include/graphblas/utils/geometry/halo_matrix_generator_iterator.hpp b/include/graphblas/utils/geometry/halo_matrix_generator_iterator.hpp new file mode 100644 index 000000000..6eb469f21 --- /dev/null +++ b/include/graphblas/utils/geometry/halo_matrix_generator_iterator.hpp @@ -0,0 +1,207 @@ + +#ifndef _H_GRB_ALGORITHMS_GEOMETRY_HALO_MATRIX_GENRATOR_ITERATOR +#define _H_GRB_ALGORITHMS_GEOMETRY_HALO_MATRIX_GENRATOR_ITERATOR + +#include + +#include "linearized_halo_ndim_system.hpp" +#include "linearized_ndim_system.hpp" +#include "linearized_ndim_iterator.hpp" +#include "array_vector_storage.hpp" + +namespace grb { + namespace algorithms { + namespace geometry { + + template< + size_t DIMS, + typename CoordType, + typename ValueType, + typename ValueCallable + > + struct HaloMatrixGeneratorIterator { + + static_assert( std::is_copy_constructible< ValueCallable >::value, + "ValueCallable must be copy-constructible" ); + + using RowIndexType = CoordType; ///< numeric type of rows + using ColumnIndexType = CoordType; + + using LinearSystemType = grb::utils::geometry::LinearizedHaloNDimSystem< RowIndexType, DIMS >; + using SelfType = HaloMatrixGeneratorIterator< DIMS, CoordType, ValueType, ValueCallable >; + using Iterator = typename LinearSystemType::Iterator; + + struct HaloPoint { + + friend SelfType; + + HaloPoint( + const ValueCallable &value_producer, + RowIndexType i, + ColumnIndexType j + ) noexcept : + _value_producer( value_producer ), + _i( i ), + _j( j ) + {} + + HaloPoint( const HaloPoint & ) = default; + + HaloPoint & operator=( const HaloPoint & ) = default; + + inline RowIndexType i() const { return _i; } + inline ColumnIndexType j() const { return _j; } + inline ValueType v() const { + return _value_producer( _i, _j); + } + + private: + // ValueType diagonal_value; ///< value to be emitted when the object has moved to the diagonal + // ValueType non_diagonal_value; ///< value to emit outside of the diagonal + ValueCallable _value_producer; + RowIndexType _i; + ColumnIndexType _j; + }; + + // interface for std::random_access_iterator + using iterator_category = std::random_access_iterator_tag; + using value_type = HaloPoint; + using pointer = value_type; + using reference = value_type; + using difference_type = typename Iterator::difference_type; + + /** + * @brief Construct a new \c HaloMatrixGeneratorIterator object, setting the current row as \p row + * and emitting \p diag if the iterator has moved on the diagonal, \p non_diag otherwise. + * + * @param sizes array with the sizes along the dimensions + * @param _halo halo of points to iterate around; must be > 0 + * @param diag value to emit when on the diagonal + * @param non_diag value to emit outside the diagonal + */ + HaloMatrixGeneratorIterator( + const LinearSystemType &system, + const ValueCallable &value_producer + ) noexcept : + _val( value_producer, 0, 0 ), + _lin_system( &system ), + _sys_iter( system.begin() ) + { + update_coords(); + } + + HaloMatrixGeneratorIterator( const SelfType & ) = default; + + // HaloMatrixGeneratorIterator( SelfType && ) = default; + + SelfType & operator=( const SelfType & ) = default; + + // SelfType & operator=( SelfType && ) = default; + + /** + * @brief Increments the iterator by moving coordinates to the next (row, column) to iterate on. + * + * This operator internally increments the columns coordinates until wrap-around, when it increments + * the row coordinates and resets the column coordinates to the first possible columns; this column coordinate + * depends on the row coordinates according to the dimensions iteration order and on the parameter \p halo. + * + * @return HaloMatrixGeneratorIterator& \c this object, with the updated state + */ + SelfType & operator++() noexcept { + (void) ++_sys_iter; + update_coords(); + return *this; + } + + SelfType & operator+=( size_t offset ) { + _sys_iter += offset; + update_coords(); + return *this; + } + + difference_type operator-( const SelfType &other ) const { + return this->_sys_iter - other._sys_iter; + } + + /** + * @brief Operator to compare \c this against \p o and return whether they differ. + * + * @param o object to compare \c this against + * @return true of the row or the column is different between \p o and \c this + * @return false if both row and column of \p o and \c this are equal + */ + bool operator!=( const SelfType &o ) const { + return this->_sys_iter != o._sys_iter; + } + + /** + * @brief Operator to compare \c this against \p o and return whether they are equal. + * + * @param o object to compare \c this against + * @return true of the row or the column is different between \p o and \c this + * @return false if both row and column of \p o and \c this are equal + */ + bool operator==( const SelfType &o ) const { + return ! operator!=( o ); + } + + /** + * @brief Operator returning the triple to directly access row, column and element values. + * + * Useful when building the matrix by copying the triple of coordinates and value, + * like for the BSP1D backend. + */ + reference operator*() const { + return _val; + } + + pointer operator->() const { + return &_val; + } + + /** + * @brief Returns current row. + */ + inline RowIndexType i() const { + return _val.i(); + } + + /** + * @brief Returns current column. + */ + inline ColumnIndexType j() const { + return _val.j(); + } + + /** + * @brief Returns the current matrix value. + * + * @return ValueType #diagonal_value if \code row == column \endcode (i.e. if \code this-> \endcode + * #i() \code == \endcode \code this-> \endcode #j()), #non_diagonal_value otherwise + */ + inline ValueType v() const { + return _val.v(); + } + + const Iterator & it() const { + return this->_sys_iter; + } + + private: + value_type _val; + const LinearSystemType *_lin_system; + Iterator _sys_iter; + + void update_coords() { + _val._i = _sys_iter->get_element_linear(); + _val._j = _sys_iter->get_neighbor_linear(); + } + }; + + + + } // namespace geometry + } // namespace utils +} // namespace grb + +#endif // _H_GRB_ALGORITHMS_GEOMETRY_HALO_MATRIX_GENRATOR_ITERATOR diff --git a/include/graphblas/utils/geometry/linearized_halo_ndim_geometry.hpp b/include/graphblas/utils/geometry/linearized_halo_ndim_geometry.hpp index 4d7fd62ce..04928ac09 100644 --- a/include/graphblas/utils/geometry/linearized_halo_ndim_geometry.hpp +++ b/include/graphblas/utils/geometry/linearized_halo_ndim_geometry.hpp @@ -1,6 +1,6 @@ -#ifndef _LINEARIZED_HALO_NDIM_GEOMETRY_H_ -#define _LINEARIZED_HALO_NDIM_GEOMETRY_H_ +#ifndef _H_GRB_ALGORITHMS_GEOMETRY_LINEARIZED_HALO_NDIM_GEOMETRY +#define _H_GRB_ALGORITHMS_GEOMETRY_LINEARIZED_HALO_NDIM_GEOMETRY #include #include @@ -8,225 +8,226 @@ #include #include #include +#include -#include "linearized_ndim_system.hpp" #include "array_vector_storage.hpp" -#include "generic_vector_storage.hpp" +#include "dynamic_vector_storage.hpp" +#include "linearized_ndim_system.hpp" #include "ndim_vector.hpp" namespace grb { namespace utils { namespace geometry { -template< typename CoordT, std::size_t DIMS > void __compute_neighbors_range( - const array_vector_storage< CoordT, DIMS >& _system_sizes, - const CoordT halo, - const array_vector_storage< CoordT, DIMS >& system_coordinates, - array_vector_storage< CoordT, DIMS >& neighbors_start, - array_vector_storage< CoordT, DIMS >& neighbors_range ) { - - for( CoordT i{0}; i < DIMS/* - 1*/; i++ ) { - const CoordT start{ system_coordinates[i] <= halo ? 0 : system_coordinates[i] - halo }; - const CoordT end{ std::min( system_coordinates[i] + halo, _system_sizes[i] - 1 ) }; - neighbors_start[i] = start; - neighbors_range[i] = end - start + 1; - } - /* - const std::size_t last{ DIMS - 1 }; - const CoordT start{ system_coordinates[ last ] <= halo ? 0 : system_coordinates[ last ] - halo }; - const CoordT end{ system_coordinates[ last ] + halo }; // can extend beyond actual DIMS-dimensional space - neighbors_start[ last ] = start; - neighbors_range[ last ] = end - start + 1; - */ -} - - - - - - -template< typename CoordT, std::size_t DIMS > std::size_t __neighbour_to_system_coords( - const std::array< CoordT, DIMS > & sizes, - std::size_t system_size, - const std::vector< ndim_vector< CoordT, CoordT, generic_vector_storage< CoordT > > > & dimension_neighbors, - CoordT halo, - CoordT neighbor, - array_vector_storage< CoordT, DIMS > & result) { - - if( neighbor > system_size ) { - throw std::invalid_argument("neighbor number ( " + std::to_string(neighbor) - + " ) >= system size ( " + std::to_string( system_size ) + " )"); - } - - array_vector_storage< CoordT, DIMS > halo_coords( DIMS ); -#ifdef DBG - std::size_t * const halo_coords_end{ halo_coords.data() + DIMS }; -#endif - std::fill_n( halo_coords.begin(), DIMS, 0 ); - - for( std::size_t _dim{DIMS}; _dim > 0; _dim--) { - - const std::size_t dimension{_dim - 1}; - const std::size_t dimension_size{ sizes[dimension] }; - const ndim_vector< CoordT, CoordT, generic_vector_storage< CoordT > > & neighbors{ dimension_neighbors[dimension] }; - - CoordT * const halo_coords_begin{ halo_coords.data() + dimension }; - -#ifdef DBG - std::cout << "DIMENSION " << dimension << std::endl << "- setup - neighbour " << neighbor << std::endl; - std::cout << "\thalo : "; - print_sequence( halo_coords_begin, halo_coords_end ) << std::endl; -#endif - - std::size_t h{0}; - std::size_t previous_neighs{ 0 }; - *halo_coords_begin = h; - std::size_t halo_max_neighs{ neighbors.at( halo_coords_begin ) }; - //std::cout << "\tinitial halo_max_neighs " << halo_max_neighs << std::endl; - while( h < halo && neighbor >= previous_neighs + halo_max_neighs ) { - h++; - *halo_coords_begin = h; - previous_neighs += halo_max_neighs; - halo_max_neighs = neighbors.at( halo_coords_begin ); - } -#ifdef DBG - std::cout << "- initial halo - neighbour " << neighbor << std::endl; - std::cout << "\th " << h << std::endl; - std::cout << "\thalo : "; - print_sequence( halo_coords_begin, halo_coords_end ) << std::endl; - std::cout << "\thalo_max_neighs " << halo_max_neighs << std::endl; -#endif - - - if ( h < halo ){ - result[dimension] = h; - neighbor -= previous_neighs; -#ifdef DBG - std::cout << "end neighbour " << neighbor << std::endl; -#endif - continue; - } - // saturation occurred - const std::size_t distance_from_halo{ ( neighbor - previous_neighs ) / halo_max_neighs }; -#ifdef DBG - std::cout << "- before middle elements - neighbour " << neighbor << std::endl; - std::cout << "\tprevious_neighs " << previous_neighs << std::endl; - std::cout << "\thalo_max_neighs " << halo_max_neighs << std::endl; - std::cout << "\tdistance_from_halo " << distance_from_halo << std::endl; - std::cout << "\tdimension_size " << dimension_size << std::endl; -#endif - if ( distance_from_halo < dimension_size - 2 * halo ) { - result[dimension] = distance_from_halo + halo; - neighbor -= (previous_neighs + distance_from_halo * halo_max_neighs) ; -#ifdef DBG - std::cout << "end neighbour " << neighbor << std::endl; -#endif - continue; - } - previous_neighs += ( dimension_size - 2 * halo ) * halo_max_neighs; -#ifdef DBG - std::cout << "- after middle elements -neighbour " << neighbor << std::endl; - std::cout << "\tprevious_neighs " << previous_neighs << std::endl; - std::cout << "\thalo_max_neighs " << halo_max_neighs << std::endl; -#endif - - h = halo - 1; - *halo_coords_begin = h; - halo_max_neighs = neighbors.at( halo_coords_begin ); - while( h > 0 && neighbor >= previous_neighs + halo_max_neighs ) { - h--; - *halo_coords_begin = h; - previous_neighs += halo_max_neighs; - halo_max_neighs = neighbors.at( halo_coords_begin ); - } - neighbor -= previous_neighs; -#ifdef DBG - std::cout << "- final halo - neighbour " << neighbor << std::endl; - std::cout << "\tadding h " << h << " previous_neighs " << previous_neighs << std::endl; -#endif - // ( dimension_size - 1 ) because coordinates are 0-based and neighbor - // is "inside" range [ previous_neighs, previous_neighs + halo_max_neighs ] - result[dimension] = dimension_size - 1 - h; -#ifdef DBG - std::cout << "end neighbour " << neighbor << std::endl; -#endif - } - - return neighbor; -} - - -template< typename CoordT > std::size_t __accumulate_dimension_neighbours( - const ndim_vector< CoordT, CoordT, generic_vector_storage< CoordT > >& prev_neighs, - CoordT* coords_buffer, - std::size_t halo, - std::size_t local_size ) { - std::size_t neighs{0}; - std::size_t h{0}; - for( ; h < halo && local_size > 1; h++ ) { - *coords_buffer = h; - - const std::size_t local_neighs{ prev_neighs.at( coords_buffer ) }; - neighs += 2 * local_neighs; // the 2 sides - local_size -= 2; - } - *coords_buffer = h; - neighs += local_size * prev_neighs.at( coords_buffer ); // innermost elements - return neighs; -} - -template< typename CoordT > void __populate_halo_neighbors( std::size_t halo, - ndim_vector< CoordT, CoordT, generic_vector_storage< CoordT > >& container ) { - - using it_type = typename ndim_vector< CoordT, CoordT, generic_vector_storage< CoordT > >::domain_iterator; - it_type end{ container.domain_end() }; - for( it_type it{ container.domain_begin() }; it != end; ++it ) { - std::size_t res{1}; - for( std::size_t h: it->get_position() ) res *= (h + 1 + halo); - container.at( it->get_position() ) = res; - } -} - -template< typename CoordT, std::size_t DIMS > std::size_t __init_halo_search( - typename linearized_ndim_system< CoordT, array_vector_storage< CoordT, DIMS > >::const_vector_reference sizes, - std::size_t halo, - std::vector< ndim_vector< CoordT, CoordT, generic_vector_storage< CoordT > > >& dimension_limits ) { - - using nd_vec = ndim_vector< CoordT, CoordT, generic_vector_storage< CoordT > >; - using nd_vec_iterator = typename nd_vec::domain_iterator; - - std::vector halo_sizes( DIMS, halo + 1); - dimension_limits.emplace_back(halo_sizes); - - // initialize values - __populate_halo_neighbors< CoordT >( halo, dimension_limits[0] ); - for( std::size_t i{1}; i < DIMS; i++ ) { - std::vector halos( DIMS - i, halo + 1 ); - dimension_limits.emplace_back(halos); - } - - std::array< CoordT, DIMS > prev_coords_buffer; // store at most DIMS values - CoordT* const prev_coords{ prev_coords_buffer.data() }; - CoordT* const second{ prev_coords + 1 }; // store previous coordinates from second position - for( std::size_t dimension{1}; dimension < DIMS; dimension++ ) { - const nd_vec& prev_neighs{dimension_limits[dimension - 1]}; - nd_vec& current_neighs{dimension_limits[dimension]}; - - nd_vec_iterator end{ current_neighs.domain_end() }; - for( nd_vec_iterator it{ current_neighs.domain_begin() }; it != end; ++it ) { - typename nd_vec::const_domain_vector_reference current_halo_coords{ it->get_position() }; - - std::copy( it->get_position().cbegin(), it->get_position().cend(), second ); - std::size_t local_size{ sizes[dimension - 1] }; - const std::size_t neighs{ __accumulate_dimension_neighbours(prev_neighs, prev_coords, halo, local_size) }; - current_neighs.at(current_halo_coords) = neighs; - } - } - return __accumulate_dimension_neighbours( dimension_limits[DIMS - 1], prev_coords, halo, sizes.back() ); -} + template< typename CoordType, size_t DIMS > void __compute_neighbors_range( + const ArrayVectorStorage< CoordType, DIMS >& _system_sizes, + const CoordType halo, + const ArrayVectorStorage< CoordType, DIMS >& system_coordinates, + ArrayVectorStorage< CoordType, DIMS >& neighbors_start, + ArrayVectorStorage< CoordType, DIMS >& neighbors_range ) { + + for( CoordType i{0}; i < DIMS/* - 1*/; i++ ) { + const CoordType start{ system_coordinates[i] <= halo ? 0 : system_coordinates[i] - halo }; + const CoordType end{ std::min( system_coordinates[i] + halo, _system_sizes[i] - 1 ) }; + neighbors_start[i] = start; + neighbors_range[i] = end - start + 1; + } + /* + const size_t last{ DIMS - 1 }; + const CoordT start{ system_coordinates[ last ] <= halo ? 0 : system_coordinates[ last ] - halo }; + const CoordT end{ system_coordinates[ last ] + halo }; // can extend beyond actual DIMS-dimensional space + neighbors_start[ last ] = start; + neighbors_range[ last ] = end - start + 1; + */ + } + + + + + + + template< typename CoordType, size_t DIMS > size_t __neighbour_to_system_coords( + const std::array< CoordType, DIMS > & sizes, + size_t system_size, + const std::vector< NDimVector< CoordType, CoordType, DynamicVectorStorage< CoordType > > > & dimension_neighbors, + CoordType halo, + CoordType neighbor, + ArrayVectorStorage< CoordType, DIMS > & result) { + + if( neighbor > system_size ) { + throw std::invalid_argument("neighbor number ( " + std::to_string(neighbor) + + " ) >= system size ( " + std::to_string( system_size ) + " )"); + } + + ArrayVectorStorage< CoordType, DIMS > halo_coords( DIMS ); + #ifdef DBG + size_t * const halo_coords_end{ halo_coords.data() + DIMS }; + #endif + std::fill_n( halo_coords.begin(), DIMS, 0 ); + + for( size_t _dim{DIMS}; _dim > 0; _dim--) { + + const size_t dimension{_dim - 1}; + const size_t dimension_size{ sizes[dimension] }; + const NDimVector< CoordType, CoordType, DynamicVectorStorage< CoordType > > & neighbors{ dimension_neighbors[dimension] }; + + CoordType * const halo_coords_begin{ halo_coords.data() + dimension }; + + #ifdef DBG + std::cout << "DIMENSION " << dimension << std::endl << "- setup - neighbour " << neighbor << std::endl; + std::cout << "\thalo : "; + print_sequence( halo_coords_begin, halo_coords_end ) << std::endl; + #endif + + size_t h{0}; + size_t previous_neighs{ 0 }; + *halo_coords_begin = h; + size_t halo_max_neighs{ neighbors.at( halo_coords_begin ) }; + //std::cout << "\tinitial halo_max_neighs " << halo_max_neighs << std::endl; + while( h < halo && neighbor >= previous_neighs + halo_max_neighs ) { + h++; + *halo_coords_begin = h; + previous_neighs += halo_max_neighs; + halo_max_neighs = neighbors.at( halo_coords_begin ); + } + #ifdef DBG + std::cout << "- initial halo - neighbour " << neighbor << std::endl; + std::cout << "\th " << h << std::endl; + std::cout << "\thalo : "; + print_sequence( halo_coords_begin, halo_coords_end ) << std::endl; + std::cout << "\thalo_max_neighs " << halo_max_neighs << std::endl; + #endif + + + if ( h < halo ){ + result[dimension] = h; + neighbor -= previous_neighs; + #ifdef DBG + std::cout << "end neighbour " << neighbor << std::endl; + #endif + continue; + } + // saturation occurred + const size_t distance_from_halo{ ( neighbor - previous_neighs ) / halo_max_neighs }; + #ifdef DBG + std::cout << "- before middle elements - neighbour " << neighbor << std::endl; + std::cout << "\tprevious_neighs " << previous_neighs << std::endl; + std::cout << "\thalo_max_neighs " << halo_max_neighs << std::endl; + std::cout << "\tdistance_from_halo " << distance_from_halo << std::endl; + std::cout << "\tdimension_size " << dimension_size << std::endl; + #endif + if ( distance_from_halo < dimension_size - 2 * halo ) { + result[dimension] = distance_from_halo + halo; + neighbor -= (previous_neighs + distance_from_halo * halo_max_neighs) ; + #ifdef DBG + std::cout << "end neighbour " << neighbor << std::endl; + #endif + continue; + } + previous_neighs += ( dimension_size - 2 * halo ) * halo_max_neighs; + #ifdef DBG + std::cout << "- after middle elements -neighbour " << neighbor << std::endl; + std::cout << "\tprevious_neighs " << previous_neighs << std::endl; + std::cout << "\thalo_max_neighs " << halo_max_neighs << std::endl; + #endif + + h = halo - 1; + *halo_coords_begin = h; + halo_max_neighs = neighbors.at( halo_coords_begin ); + while( h > 0 && neighbor >= previous_neighs + halo_max_neighs ) { + h--; + *halo_coords_begin = h; + previous_neighs += halo_max_neighs; + halo_max_neighs = neighbors.at( halo_coords_begin ); + } + neighbor -= previous_neighs; + #ifdef DBG + std::cout << "- final halo - neighbour " << neighbor << std::endl; + std::cout << "\tadding h " << h << " previous_neighs " << previous_neighs << std::endl; + #endif + // ( dimension_size - 1 ) because coordinates are 0-based and neighbor + // is "inside" range [ previous_neighs, previous_neighs + halo_max_neighs ] + result[dimension] = dimension_size - 1 - h; + #ifdef DBG + std::cout << "end neighbour " << neighbor << std::endl; + #endif + } + + return neighbor; + } + + + template< typename CoordType > size_t __accumulate_dimension_neighbours( + const NDimVector< CoordType, CoordType, DynamicVectorStorage< CoordType > >& prev_neighs, + CoordType* coords_buffer, + size_t halo, + size_t local_size ) { + size_t neighs{0}; + size_t h{0}; + for( ; h < halo && local_size > 1; h++ ) { + *coords_buffer = h; + + const size_t local_neighs{ prev_neighs.at( coords_buffer ) }; + neighs += 2 * local_neighs; // the 2 sides + local_size -= 2; + } + *coords_buffer = h; + neighs += local_size * prev_neighs.at( coords_buffer ); // innermost elements + return neighs; + } + + template< typename CoordType > void __populate_halo_neighbors( size_t halo, + NDimVector< CoordType, CoordType, DynamicVectorStorage< CoordType > >& container ) { + + using it_type = typename NDimVector< CoordType, CoordType, DynamicVectorStorage< CoordType > >::DomainIterator; + it_type end{ container.domain_end() }; + for( it_type it{ container.domain_begin() }; it != end; ++it ) { + size_t res{1}; + for( size_t h: it->get_position() ) res *= (h + 1 + halo); + container.at( it->get_position() ) = res; + } + } + + template< typename CoordType, size_t DIMS > size_t __init_halo_search( + typename LinearizedNDimSystem< CoordType, ArrayVectorStorage< CoordType, DIMS > >::ConstVectorReference sizes, + size_t halo, + std::vector< NDimVector< CoordType, CoordType, DynamicVectorStorage< CoordType > > >& dimension_limits ) { + + using nd_vec = NDimVector< CoordType, CoordType, DynamicVectorStorage< CoordType > >; + using nd_vec_iterator = typename nd_vec::DomainIterator; + + std::vector halo_sizes( DIMS, halo + 1); + dimension_limits.emplace_back(halo_sizes); + + // initialize values + __populate_halo_neighbors< CoordType >( halo, dimension_limits[0] ); + for( size_t i{1}; i < DIMS; i++ ) { + std::vector halos( DIMS - i, halo + 1 ); + dimension_limits.emplace_back(halos); + } + + std::array< CoordType, DIMS > prev_coords_buffer; // store at most DIMS values + CoordType* const prev_coords{ prev_coords_buffer.data() }; + CoordType* const second{ prev_coords + 1 }; // store previous coordinates from second position + for( size_t dimension{1}; dimension < DIMS; dimension++ ) { + const nd_vec& prev_neighs{dimension_limits[dimension - 1]}; + nd_vec& current_neighs{dimension_limits[dimension]}; + + nd_vec_iterator end{ current_neighs.domain_end() }; + for( nd_vec_iterator it{ current_neighs.domain_begin() }; it != end; ++it ) { + typename nd_vec::ConstDomainVectorReference current_halo_coords{ it->get_position() }; + + std::copy( it->get_position().cbegin(), it->get_position().cend(), second ); + size_t local_size{ sizes[dimension - 1] }; + const size_t neighs{ __accumulate_dimension_neighbours(prev_neighs, prev_coords, halo, local_size) }; + current_neighs.at(current_halo_coords) = neighs; + } + } + return __accumulate_dimension_neighbours( dimension_limits[DIMS - 1], prev_coords, halo, sizes.back() ); + } } // namespace geometry } // namespace utils } // namespace grb -#endif // _LINEARIZED_HALO_NDIM_GEOMETRY_H_ +#endif // _H_GRB_ALGORITHMS_GEOMETRY_LINEARIZED_HALO_NDIM_GEOMETRY diff --git a/include/graphblas/utils/geometry/linearized_halo_ndim_iterator.hpp b/include/graphblas/utils/geometry/linearized_halo_ndim_iterator.hpp index ede3af52c..9829fdb46 100644 --- a/include/graphblas/utils/geometry/linearized_halo_ndim_iterator.hpp +++ b/include/graphblas/utils/geometry/linearized_halo_ndim_iterator.hpp @@ -1,12 +1,29 @@ -#ifndef _LINEARIZED_HALO_NDIM_ITERATOR_H_ -#define _LINEARIZED_HALO_NDIM_ITERATOR_H_ +/* + * Copyright 2022 Huawei Technologies Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef _H_GRB_ALGORITHMS_GEOMETRY_LINEARIZED_HALO_NDIM_ITERATOR +#define _H_GRB_ALGORITHMS_GEOMETRY_LINEARIZED_HALO_NDIM_ITERATOR #include #include #include #include #include +#include #include "linearized_ndim_system.hpp" #include "array_vector_storage.hpp" @@ -16,362 +33,353 @@ namespace grb { namespace utils { namespace geometry { -// forward declaration -template< typename CoordT, std::size_t DIMS > class linearized_halo_ndim_system; - -template< typename CoordT, std::size_t DIMS > class linearized_halo_ndim_iterator { - - using system_t = linearized_halo_ndim_system< CoordT, DIMS >; - using vector_t = array_vector_storage< CoordT, DIMS >; - using vector_iter = linearized_ndim_iterator< CoordT, vector_t >; -public: - - //using vector_t = typename vector_iter::vector_t; - using const_vector_reference = typename vector_iter::const_vector_reference; - - - - struct halo_ndim_point { - private: - - // for linearization - const system_t* _system; - - // for iteration - vector_iter _element_iter; // coordinates iterator - - //vector_t* _element; - //std::size_t _coordinates_linear; - vector_t _neighbor; //the actual neighbor - //std::size_t _neighbor_linear; - CoordT _position; - - public: - - friend linearized_halo_ndim_iterator< CoordT, DIMS>; - - halo_ndim_point() = delete; - - halo_ndim_point( const halo_ndim_point& ) = default; - - halo_ndim_point( halo_ndim_point&& ) = delete; - - halo_ndim_point( const system_t& system ) noexcept : - _system( &system ), - _element_iter( system ), - _neighbor( DIMS ), - _position( 0 ) - { - std::fill_n( this->_neighbor.begin(), DIMS, 0 ); - } - - halo_ndim_point& operator=( const halo_ndim_point& ) = default; - - //halo_ndim_point& operator=( halo_ndim_point&& ) = delete; - - const_vector_reference get_element() const { - return this->_element_iter->get_position(); - } - - std::size_t get_element_linear() const { - return this->_system->ndim_to_linear( this->_element_iter->get_position() ); - } - - const_vector_reference get_neighbor() const { - return this->_neighbor; - } - - std::size_t get_neighbor_linear() const { - return this->_system->ndim_to_linear( this->_neighbor ); - } - - CoordT get_position() const { - return this->_position; - } - }; - - - - - - - using const_point_reference = const struct halo_ndim_point&; - using const_point_pointer = const struct halo_ndim_point*; - - // interface for std::random_access_iterator - using iterator_category = std::random_access_iterator_tag; - using value_type = halo_ndim_point; - using pointer = const halo_ndim_point*; - using reference = const halo_ndim_point&; - using difference_type = signed long; - -private: - - halo_ndim_point _point; - linearized_ndim_system< CoordT, vector_t > _neighbors_linearizer; - vector_iter _neighbor_iter; // iterator in the sub-space of neighbors (0-based) - vector_t _neighbors_start; - vector_iter _neighbor_end; - - inline void __update_neighbor() { - for( std::size_t i{0}; i < DIMS; i++ ) { - //(this->_point)._neighbor[i] = this->_neighbors_start[i] + (*(this->_neighbor_iter))[i]; - this->_point._neighbor[i] = this->_neighbors_start[i] + this->_neighbor_iter->get_position()[i]; - } - } - - /* - void __update_neighbor_linear() { - (this->_point)._neighbor_linear = - this->_system.ndim_to_linear( this->_point._neighbor ); - } - */ - - inline void on_neighbor_iter_update() { - this->__update_neighbor(); - //this->__update_neighbor_linear(); - } - - /* - void __update_coordinates_linear() { - (this->_point)._coordinates_linear = - this->_system.ndim_to_linear( *this->_element_iter ); - } - */ - - void on_element_update() { - //this->__update_coordinates_linear(); - // reset everything - vector_t neighbors_range( DIMS ); - this->_point._system->compute_neighbors_range( - //*(this->_point._element_iter), - this->_point._element_iter->get_position(), - this->_neighbors_start, - neighbors_range - ); - /* - std::cout << "\t=== start "; - print( this->_neighbors_start ) << " range "; - print( neighbors_range ) << std::endl; - */ - // re-target _neighbors_linearizer - this->_neighbors_linearizer.retarget( neighbors_range ); - } - - void on_element_advance() { - this->on_element_update(); - - this->_neighbor_iter = vector_iter( this->_neighbors_linearizer ); - this->_neighbor_end = vector_iter::make_system_end_iterator( this->_neighbors_linearizer ); - - this->on_neighbor_iter_update(); - } - -public: - - linearized_halo_ndim_iterator() = delete; - - linearized_halo_ndim_iterator( const system_t& system ) noexcept : - _point( system ), - _neighbors_linearizer( DIMS, system.halo() + 1 ), - _neighbor_iter( this->_neighbors_linearizer ), - _neighbors_start( DIMS ), - _neighbor_end( vector_iter::make_system_end_iterator( this->_neighbors_linearizer ) ) - { - std::fill_n( this->_neighbors_start.begin(), DIMS, 0 ); - } - - - /* - linearized_halo_ndim_iterator( const linearized_halo_ndim_iterator< CoordT, DIMS >& original ) noexcept: - _coordinates_linearizer( original._coordinates_linearizer ), - _halo( original._halo ), - _dimension_limits( original._dimension_limits ), - _neighbors_linearizer( original._neighbors_linearizer ), - _element_iter( original._element_iter ), - _neighbor_iter( original._neighbor_iter ), - _neighbor_end( original._neighbor_end ), - _neighbors_start( original._neighbors_start ), - _point( original._point ) {} - */ - - linearized_halo_ndim_iterator( const linearized_halo_ndim_iterator< CoordT, DIMS >& ) = default; - - //linearized_halo_ndim_iterator( linearized_halo_ndim_iterator< CoordT, DIMS >&& original ) = delete; - - /* - linearized_halo_ndim_iterator< CoordT, DIMS >& operator=( - const linearized_halo_ndim_iterator< CoordT, DIMS >& original ) noexcept { - this->_coordinates_linearizer = original._coordinates_linearizer; - this->_halo = original._halo; - this->_dimension_limits = original._dimension_limits; - this->_neighbors_linearizer = original._neighbors_linearizer; - this->_element_iter = original._element_iter; - this->_coordinates_linear = original._coordinates_linear; - this->_neighbor_iter = original._neighbor_iter; - this->_neighbor_end = original._neighbor_end; - this->_neighbor = original._neighbor; - this->_neighbors_start = original._neighbors_start; - this->_neighbor_linear = original._neighbor_linear; - } - */ - - linearized_halo_ndim_iterator< CoordT, DIMS >& operator=( const linearized_halo_ndim_iterator< CoordT, DIMS >& ) = default; - - //linearized_halo_ndim_iterator< CoordT, DIMS >& operator=( linearized_halo_ndim_iterator< CoordT, DIMS >&& ) = delete; - - bool operator!=( const linearized_halo_ndim_iterator< CoordT, DIMS >& other ) const { - //return (this->_point)._coordinates_linear != (other._point)._coordinates_linear - // || (this->_point)._neighbor_linear != (other._point)._neighbor_linear; - return this->_point._position != other._point._position; // use linear coordinate - } - - const_point_reference operator*() const { - return this->_point; - } - - const_point_pointer operator->() const { - return &(this->_point); - } - - bool has_more_neighbours() const { - return this->_neighbor_iter != this->_neighbor_end; - } - - void next_neighbour() { - /* - std::cout << "sizes: " << this->_neighbors_linearizer.get_sizes() - << " offset " << this->_neighbor_iter->get_position() << " -> " - << this->_neighbors_linearizer.ndim_to_linear_offset( this->_neighbor_iter->get_position() ) - << std::endl; - */ - ++(this->_neighbor_iter); - this->on_neighbor_iter_update(); - this->_point._position++; - } - - bool has_more_elements() const { - return this->_point.get_element_linear() != (this->_point._system)->base_system_size(); - } - - void next_element() { - std::size_t num_neighbours = this->_neighbors_linearizer.system_size(); - std::size_t neighbour_position_offset = - this->_neighbors_linearizer.ndim_to_linear_offset( this->_neighbor_iter->get_position() ); - // std::cout << " num_neighbours " << num_neighbours << " offset " << neighbour_position_offset << std::endl; - ++(this->_point._element_iter); - this->on_element_advance(); - // this->_point._position++; - this->_point._position -= neighbour_position_offset; - this->_point._position += num_neighbours; - } - - linearized_halo_ndim_iterator< CoordT, DIMS >& operator++() noexcept { - ++(this->_neighbor_iter); - if( !has_more_neighbours() ) { - ++(this->_point._element_iter); - //this->_coordinates_linear = this->_coordinates_linearizer.ndim_to_linear( this->_element_iter ); - this->on_element_advance(); - - } else { - this->on_neighbor_iter_update(); - } - this->_point._position++; - return *this; - } - - - - linearized_halo_ndim_iterator< CoordT, DIMS >& operator+=( std::size_t offset ) { - if( offset == 1UL ) { - return this->operator++(); - } - const std::size_t final_position { this->_point._position + offset }; - if( final_position > this->_point._system->halo_system_size() ) { - throw std::range_error( "neighbor linear value beyond system" ); - } - vector_t final_element( DIMS ); - std::size_t neighbor_index{ (this->_point._system->neighbour_linear_to_element( final_position, final_element )) }; - - // std::cout << "\t=== element " << offset << " -- "; - // std::cout << final_element[0] << " " << final_element[0] << std::endl; - - this->_point._element_iter = vector_iter( *this->_point._system, final_element.cbegin() ); - //this->_point._element = &( *this->_element_iter ); - this->_point._position = final_position; - - this->on_element_update(); - this->_neighbors_linearizer.linear_to_ndim( neighbor_index, final_element ); - - this->_neighbor_iter = vector_iter( this->_neighbors_linearizer, final_element.cbegin() ); - this->_neighbor_end = vector_iter::make_system_end_iterator( this->_neighbors_linearizer ); - this->on_neighbor_iter_update(); - - return *this; - } - - difference_type operator-( const linearized_halo_ndim_iterator< CoordT, DIMS >& other ) const { - /* - if( _point.get_position() < a_point.get_position() ) { - throw std::invalid_argument( "first iterator is in a lower position than second" ); - } - */ - std::size_t a_pos{ _point.get_position() }, b_pos{ other._point.get_position() }; - // std::cout << "diff " << a_pos << " - " << b_pos << std::endl; - std::size_t lowest{ std::min( a_pos, b_pos ) }, highest{ std::max( a_pos, b_pos )}; - using diff_t = typename linearized_halo_ndim_iterator< CoordT, DIMS >::difference_type; - - if( highest - lowest > static_cast< std::size_t >( - std::numeric_limits< diff_t >::max() ) ) { - throw std::invalid_argument( "iterators are too distant" ); - } - - return ( static_cast< diff_t >( a_pos - b_pos ) ); - } - - - - - // implementation depending on logic in operator++ - static linearized_halo_ndim_iterator< CoordT, DIMS > make_system_end_iterator( - const system_t& system - ) { - linearized_halo_ndim_iterator< CoordT, DIMS > result( system ); - - /* - std::cout << "result 0: element "; - print(result->get_element()) << " neighbor "; - print(result->get_neighbor()) << std::endl; - */ - - // go to the very first point outside of space - result._point._element_iter = vector_iter::make_system_end_iterator( system ); - /* - std::cout << "result 1: element "; - print(result->get_element()) << " neighbor "; - print(result->get_neighbor()) << std::endl; - */ - - result.on_element_advance(); - result._point._position = system.halo_system_size(); - //std::cout << "got sys size " << system.halo_system_size() << std::endl; - - return result; - } - -}; - -/* -template< typename CoordT, std::size_t DIMS > linearized_halo_ndim_iterator< CoordT, DIMS > - operator+( const linearized_halo_ndim_iterator< CoordT, DIMS >& original, std::size_t increment ) { - linearized_halo_ndim_iterator< CoordT, DIMS > res( original ); - return ( res += increment ); -} -*/ + // forward declaration + template< + typename SizeType, + size_t DIMS + > class LinearizedHaloNDimSystem; + + template< + typename SizeType, + size_t DIMS + > class LinearizedHaloNDimIterator { + + using SystemType = LinearizedHaloNDimSystem< SizeType, DIMS >; + using VectorType = ArrayVectorStorage< SizeType, DIMS >; + using VectorIteratorType = LinearizedNDimIterator< SizeType, VectorType >; + + public: + //using VectorType = typename VectorIteratorType::VectorType; + using ConstVectorReference = typename VectorIteratorType::ConstVectorReference; + using SelfType = LinearizedHaloNDimIterator< SizeType, DIMS >; + + struct HaloNDimElement { + private: + + // for linearization + const SystemType* _system; + + // for iteration + VectorIteratorType _element_iter; // coordinates iterator + + //VectorType* _element; + //size_t _coordinates_linear; + VectorType _neighbor; //the current neighbor + //size_t _neighbor_linear; + SizeType _position; + + public: + friend SelfType; + + HaloNDimElement() = delete; + + HaloNDimElement( const HaloNDimElement& ) = default; + + HaloNDimElement( HaloNDimElement&& ) = delete; + + HaloNDimElement( const SystemType& system ) noexcept : + _system( &system ), + _element_iter( system ), + _neighbor( DIMS ), + _position( 0 ) + { + std::fill_n( this->_neighbor.begin(), DIMS, 0 ); + } + + HaloNDimElement& operator=( const HaloNDimElement& ) = default; + + //HaloNDimElement& operator=( HaloNDimElement&& ) = delete; + + ConstVectorReference get_element() const { + return this->_element_iter->get_position(); + } + + size_t get_element_linear() const { + return this->_system->ndim_to_linear( this->_element_iter->get_position() ); + } + + ConstVectorReference get_neighbor() const { + return this->_neighbor; + } + + size_t get_neighbor_linear() const { + return this->_system->ndim_to_linear( this->_neighbor ); + } + + SizeType get_position() const { + return this->_position; + } + }; + + // interface for std::random_access_iterator + using iterator_category = std::random_access_iterator_tag; + using value_type = HaloNDimElement; + using pointer = const HaloNDimElement*; + using reference = const HaloNDimElement&; + using difference_type = signed long; + + private: + HaloNDimElement _point; + LinearizedNDimSystem< SizeType, VectorType > _neighbors_linearizer; + VectorIteratorType _neighbor_iter; // iterator in the sub-space of neighbors (0-based) + VectorType _neighbors_start; + VectorIteratorType _neighbor_end; + + inline void __update_neighbor() { + for( size_t i{0}; i < DIMS; i++ ) { + //(this->_point)._neighbor[i] = this->_neighbors_start[i] + (*(this->_neighbor_iter))[i]; + this->_point._neighbor[i] = this->_neighbors_start[i] + this->_neighbor_iter->get_position()[i]; + } + } + + /* + void __update_neighbor_linear() { + (this->_point)._neighbor_linear = + this->_system.ndim_to_linear( this->_point._neighbor ); + } + */ + + inline void on_neighbor_iter_update() { + this->__update_neighbor(); + //this->__update_neighbor_linear(); + } + + /* + void __update_coordinates_linear() { + (this->_point)._coordinates_linear = + this->_system.ndim_to_linear( *this->_element_iter ); + } + */ + + void on_element_update() { + //this->__update_coordinates_linear(); + // reset everything + VectorType neighbors_range( DIMS ); + this->_point._system->compute_neighbors_range( + //*(this->_point._element_iter), + this->_point._element_iter->get_position(), + this->_neighbors_start, + neighbors_range + ); + /* + std::cout << "\t=== start "; + print( this->_neighbors_start ) << " range "; + print( neighbors_range ) << std::endl; + */ + // re-target _neighbors_linearizer + this->_neighbors_linearizer.retarget( neighbors_range ); + } + + void on_element_advance() { + this->on_element_update(); + + this->_neighbor_iter = VectorIteratorType( this->_neighbors_linearizer ); + this->_neighbor_end = VectorIteratorType::make_system_end_iterator( this->_neighbors_linearizer ); + + this->on_neighbor_iter_update(); + } + + public: + + LinearizedHaloNDimIterator() = delete; + + LinearizedHaloNDimIterator( const SystemType& system ) noexcept : + _point( system ), + _neighbors_linearizer( DIMS, system.halo() + 1 ), + _neighbor_iter( this->_neighbors_linearizer ), + _neighbors_start( DIMS ), + _neighbor_end( VectorIteratorType::make_system_end_iterator( this->_neighbors_linearizer ) ) + { + std::fill_n( this->_neighbors_start.begin(), DIMS, 0 ); + } + + + /* + LinearizedHaloNDimIterator( const LinearizedHaloNDimIterator< SizeType, DIMS >& original ) noexcept: + _coordinates_linearizer( original._coordinates_linearizer ), + _halo( original._halo ), + _dimension_limits( original._dimension_limits ), + _neighbors_linearizer( original._neighbors_linearizer ), + _element_iter( original._element_iter ), + _neighbor_iter( original._neighbor_iter ), + _neighbor_end( original._neighbor_end ), + _neighbors_start( original._neighbors_start ), + _point( original._point ) {} + */ + + LinearizedHaloNDimIterator( const SelfType & ) = default; + + //LinearizedHaloNDimIterator( SelfType &&original ) = delete; + + /* + LinearizedHaloNDimIterator< SizeType, DIMS >& operator=( + const LinearizedHaloNDimIterator< SizeType, DIMS >& original ) noexcept { + this->_coordinates_linearizer = original._coordinates_linearizer; + this->_halo = original._halo; + this->_dimension_limits = original._dimension_limits; + this->_neighbors_linearizer = original._neighbors_linearizer; + this->_element_iter = original._element_iter; + this->_coordinates_linear = original._coordinates_linear; + this->_neighbor_iter = original._neighbor_iter; + this->_neighbor_end = original._neighbor_end; + this->_neighbor = original._neighbor; + this->_neighbors_start = original._neighbors_start; + this->_neighbor_linear = original._neighbor_linear; + } + */ + + SelfType & operator=( const SelfType & ) = default; + + //SelfType & operator=( SelfType && ) = delete; + + bool operator!=( const SelfType &other ) const { + //return (this->_point)._coordinates_linear != (other._point)._coordinates_linear + // || (this->_point)._neighbor_linear != (other._point)._neighbor_linear; + return this->_point._position != other._point._position; // use linear coordinate + } + + reference operator*() const { + return this->_point; + } + + pointer operator->() const { + return &(this->_point); + } + + bool has_more_neighbours() const { + return this->_neighbor_iter != this->_neighbor_end; + } + + void next_neighbour() { + /* + std::cout << "sizes: " << this->_neighbors_linearizer.get_sizes() + << " offset " << this->_neighbor_iter->get_position() << " -> " + << this->_neighbors_linearizer.ndim_to_linear_offset( this->_neighbor_iter->get_position() ) + << std::endl; + */ + ++(this->_neighbor_iter); + this->on_neighbor_iter_update(); + this->_point._position++; + } + + bool has_more_elements() const { + return this->_point.get_element_linear() != (this->_point._system)->base_system_size(); + } + + void next_element() { + size_t num_neighbours = this->_neighbors_linearizer.system_size(); + size_t neighbour_position_offset = + this->_neighbors_linearizer.ndim_to_linear_offset( this->_neighbor_iter->get_position() ); + // std::cout << " num_neighbours " << num_neighbours << " offset " << neighbour_position_offset << std::endl; + ++(this->_point._element_iter); + this->on_element_advance(); + // this->_point._position++; + this->_point._position -= neighbour_position_offset; + this->_point._position += num_neighbours; + } + + SelfType & operator++() noexcept { + ++(this->_neighbor_iter); + if( !has_more_neighbours() ) { + ++(this->_point._element_iter); + //this->_coordinates_linear = this->_coordinates_linearizer.ndim_to_linear( this->_element_iter ); + this->on_element_advance(); + + } else { + this->on_neighbor_iter_update(); + } + this->_point._position++; + return *this; + } + + SelfType & operator+=( size_t offset ) { + if( offset == 1UL ) { + return this->operator++(); + } + const size_t final_position { this->_point._position + offset }; + if( final_position > this->_point._system->halo_system_size() ) { + throw std::range_error( "neighbor linear value beyond system" ); + } + VectorType final_element( DIMS ); + size_t neighbor_index{ (this->_point._system->neighbour_linear_to_element( final_position, final_element )) }; + + // std::cout << "\t=== element " << offset << " -- "; + // std::cout << final_element[0] << " " << final_element[0] << std::endl; + + this->_point._element_iter = VectorIteratorType( *this->_point._system, final_element.cbegin() ); + //this->_point._element = &( *this->_element_iter ); + this->_point._position = final_position; + + this->on_element_update(); + this->_neighbors_linearizer.linear_to_ndim( neighbor_index, final_element ); + + this->_neighbor_iter = VectorIteratorType( this->_neighbors_linearizer, final_element.cbegin() ); + this->_neighbor_end = VectorIteratorType::make_system_end_iterator( this->_neighbors_linearizer ); + this->on_neighbor_iter_update(); + + return *this; + } + + difference_type operator-( const SelfType &other ) const { + /* + if( _point.get_position() < a_point.get_position() ) { + throw std::invalid_argument( "first iterator is in a lower position than second" ); + } + */ + size_t a_pos{ _point.get_position() }, b_pos{ other._point.get_position() }; + // std::cout << "diff " << a_pos << " - " << b_pos << std::endl; + size_t lowest{ std::min( a_pos, b_pos ) }, highest{ std::max( a_pos, b_pos )}; + using diff_t = typename LinearizedHaloNDimIterator< SizeType, DIMS >::difference_type; + + if( highest - lowest > static_cast< size_t >( + std::numeric_limits< diff_t >::max() ) ) { + throw std::invalid_argument( "iterators are too distant" ); + } + + return ( static_cast< diff_t >( a_pos - b_pos ) ); + } + + + + + // implementation depending on logic in operator++ + static SelfType make_system_end_iterator( const SystemType& system ) { + SelfType result( system ); + + /* + std::cout << "result 0: element "; + print(result->get_element()) << " neighbor "; + print(result->get_neighbor()) << std::endl; + */ + + // go to the very first point outside of space + result._point._element_iter = VectorIteratorType::make_system_end_iterator( system ); + /* + std::cout << "result 1: element "; + print(result->get_element()) << " neighbor "; + print(result->get_neighbor()) << std::endl; + */ + + result.on_element_advance(); + result._point._position = system.halo_system_size(); + //std::cout << "got sys size " << system.halo_system_size() << std::endl; + + return result; + } + + }; + + /* + template< typename SizeType, size_t DIMS > LinearizedHaloNDimIterator< SizeType, DIMS > + operator+( const LinearizedHaloNDimIterator< SizeType, DIMS >& original, size_t increment ) { + LinearizedHaloNDimIterator< SizeType, DIMS > res( original ); + return ( res += increment ); + } + */ } // namespace geometry } // namespace utils } // namespace grb -#endif // _LINEARIZED_HALO_NDIM_ITERATOR_H_ +#endif // _H_GRB_ALGORITHMS_GEOMETRY_LINEARIZED_HALO_NDIM_ITERATOR diff --git a/include/graphblas/utils/geometry/linearized_halo_ndim_system.hpp b/include/graphblas/utils/geometry/linearized_halo_ndim_system.hpp index f915492ac..af296cc9f 100644 --- a/include/graphblas/utils/geometry/linearized_halo_ndim_system.hpp +++ b/include/graphblas/utils/geometry/linearized_halo_ndim_system.hpp @@ -1,11 +1,12 @@ -#ifndef _LINEARIZED_HALO_NDIM_SYSTEM_H_ -#define _LINEARIZED_HALO_NDIM_SYSTEM_H_ +#ifndef _H_GRB_ALGORITHMS_GEOMETRY_LINEARIZED_HALO_NDIM_SYSTEM +#define _H_GRB_ALGORITHMS_GEOMETRY_LINEARIZED_HALO_NDIM_SYSTEM #include #include #include #include +#include #include "array_vector_storage.hpp" #include "linearized_ndim_system.hpp" @@ -16,96 +17,100 @@ namespace grb { namespace utils { namespace geometry { -// only with array_vector_storage -template< typename CoordT, std::size_t DIMS > class linearized_halo_ndim_system: - public linearized_ndim_system< CoordT, array_vector_storage< CoordT, DIMS > > { -public: - - using iterator = linearized_halo_ndim_iterator< CoordT, DIMS >; - using const_vector_reference = typename array_vector_storage< CoordT, DIMS >::const_vector_storage; - using self_t = linearized_halo_ndim_system< CoordT, DIMS >; - using base_t = linearized_ndim_system< CoordT, array_vector_storage< CoordT, DIMS > >; - - linearized_halo_ndim_system( const_vector_reference sizes, CoordT halo ): - base_t( sizes.cbegin(), sizes.cend() ), - _halo( halo ) { - - for( CoordT __size : sizes ) { - if ( __size < 2 * halo + 1 ) { - throw std::invalid_argument( - std::string( "the halo (" + std::to_string(halo) + - std::string( ") goes beyond a system size (" ) + - std::to_string( __size) + std::string( ")" ) ) ); - } - } - - this->_system_size = __init_halo_search< CoordT, DIMS >( - this->get_sizes(), - _halo, this->_dimension_limits ); - assert( this->_dimension_limits.size() == DIMS ); - } - - linearized_halo_ndim_system() = delete; - - linearized_halo_ndim_system( const self_t & ) = default; - - linearized_halo_ndim_system( self_t && ) = delete; - - ~linearized_halo_ndim_system() noexcept {} - - self_t & operator=( const self_t & ) = default; - - self_t & operator=( self_t && ) = delete; - - iterator begin() const { - return iterator( *this ); - } - - iterator end() const { - return iterator::make_system_end_iterator( *this ); - } - - std::size_t halo_system_size() const { - return this->_system_size; - } - - std::size_t base_system_size() const { - return this->base_t::system_size(); - } - - std::size_t halo() const { - return this->_halo; - } - - void compute_neighbors_range( - const array_vector_storage< CoordT, DIMS >& system_coordinates, - array_vector_storage< CoordT, DIMS >& neighbors_start, - array_vector_storage< CoordT, DIMS >& neighbors_range) const noexcept { - __compute_neighbors_range( this->get_sizes(), - this->_halo, - system_coordinates, - neighbors_start, - neighbors_range - ); - } - - std::size_t neighbour_linear_to_element ( - CoordT neighbor, - array_vector_storage< CoordT, DIMS > & result) const noexcept { - return __neighbour_to_system_coords( this->get_sizes(), - this->_system_size, this->_dimension_limits, this->_halo, neighbor, result ); - } - -private: - - const CoordT _halo; - std::vector< ndim_vector< CoordT, CoordT, generic_vector_storage< CoordT > > > _dimension_limits; - std::size_t _system_size; - -}; + // only with ArrayVectorStorage + template< + typename SizeType, + size_t DIMS + > class LinearizedHaloNDimSystem: + public LinearizedNDimSystem< SizeType, ArrayVectorStorage< SizeType, DIMS > > { + public: + + using VectorType = ArrayVectorStorage< SizeType, DIMS >; + using ConstVectorStorageType = typename VectorType::ConstVectorStorageType; + using SelfType = LinearizedHaloNDimSystem< SizeType, DIMS >; + using BaseType = LinearizedNDimSystem< SizeType, VectorType >; + using Iterator = LinearizedHaloNDimIterator< SizeType, DIMS >; + + LinearizedHaloNDimSystem( ConstVectorStorageType sizes, SizeType halo ): + BaseType( sizes.cbegin(), sizes.cend() ), + _halo( halo ) { + + for( SizeType __size : sizes ) { + if ( __size < 2 * halo + 1 ) { + throw std::invalid_argument( + std::string( "the halo (" + std::to_string(halo) + + std::string( ") goes beyond a system size (" ) + + std::to_string( __size) + std::string( ")" ) ) ); + } + } + + this->_system_size = __init_halo_search< SizeType, DIMS >( + this->get_sizes(), + _halo, this->_dimension_limits ); + assert( this->_dimension_limits.size() == DIMS ); + } + + LinearizedHaloNDimSystem() = delete; + + LinearizedHaloNDimSystem( const SelfType & ) = default; + + LinearizedHaloNDimSystem( SelfType && ) = delete; + + ~LinearizedHaloNDimSystem() noexcept {} + + SelfType & operator=( const SelfType & ) = default; + + SelfType & operator=( SelfType && ) = delete; + + Iterator begin() const { + return Iterator( *this ); + } + + Iterator end() const { + return Iterator::make_system_end_iterator( *this ); + } + + size_t halo_system_size() const { + return this->_system_size; + } + + size_t base_system_size() const { + return this->BaseType::system_size(); + } + + size_t halo() const { + return this->_halo; + } + + void compute_neighbors_range( + const VectorType &system_coordinates, + VectorType &neighbors_start, + VectorType &neighbors_range) const noexcept { + __compute_neighbors_range( this->get_sizes(), + this->_halo, + system_coordinates, + neighbors_start, + neighbors_range + ); + } + + size_t neighbour_linear_to_element ( + SizeType neighbor, + VectorType &result) const noexcept { + return __neighbour_to_system_coords( this->get_sizes(), + this->_system_size, this->_dimension_limits, this->_halo, neighbor, result ); + } + + private: + + const SizeType _halo; + std::vector< NDimVector< SizeType, SizeType, DynamicVectorStorage< SizeType > > > _dimension_limits; + size_t _system_size; + + }; } // namespace geometry } // namespace utils } // namespace grb -#endif // _LINEARIZED_HALO_NDIM_SYSTEM_H_ +#endif // _H_GRB_ALGORITHMS_GEOMETRY_LINEARIZED_HALO_NDIM_SYSTEM diff --git a/include/graphblas/utils/geometry/linearized_ndim_iterator.hpp b/include/graphblas/utils/geometry/linearized_ndim_iterator.hpp index 20a6473cc..60f424164 100644 --- a/include/graphblas/utils/geometry/linearized_ndim_iterator.hpp +++ b/include/graphblas/utils/geometry/linearized_ndim_iterator.hpp @@ -1,178 +1,197 @@ -#ifndef _NDIM_ITERATOR_H_ -#define _NDIM_ITERATOR_H_ +/* + * Copyright 2022 Huawei Technologies Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef _H_GRB_ALGORITHMS_GEOMETRY_NDIM_ITERATOR +#define _H_GRB_ALGORITHMS_GEOMETRY_NDIM_ITERATOR #include #include #include #include #include +#include #include "array_vector_storage.hpp" - namespace grb { namespace utils { namespace geometry { -// forward declaration for default -template< typename T, typename StorageT > class linearized_ndim_system; - -template< typename T, typename StorageT > class linearized_ndim_iterator { -public: - - using storage_t = StorageT; - using lin_t = linearized_ndim_system< T, storage_t >; - using const_vector_reference = const storage_t&; - using self_t = linearized_ndim_iterator< T, StorageT >; - - struct ndim_point { - private: - - const lin_t* system; // pointer because of copy assignment - storage_t coords; - - public: - - friend self_t; - - ndim_point() = delete; - - ndim_point( const ndim_point& ) = default; + // forward declaration for default + template< + typename SizeType, + typename InternalVectorType + > class LinearizedNDimSystem; - ndim_point( ndim_point&& ) = delete; - - ndim_point( const lin_t& _system ) noexcept : - system( &_system ), - coords( _system.dimensions() ) - { - std::fill_n( this->coords.begin(), _system.dimensions(), 0 ); - } - - ndim_point& operator=( const ndim_point& ) = default; - - inline const_vector_reference get_position() const { - return coords; - } - - std::size_t get_linear_position() const { - return system->ndim_to_linear( coords ); - } - }; - - - // interface for std::random_access_iterator - using iterator_category = std::random_access_iterator_tag; - using value_type = ndim_point; - using pointer = const value_type*; - using reference = const value_type&; - using difference_type = signed long; - - linearized_ndim_iterator( const lin_t &_system ) noexcept : - _p( _system ) - {} - - template< typename IterT > linearized_ndim_iterator( const lin_t &_system, IterT begin ) noexcept : - _p( _system ) - { - std::copy_n( begin, _system.dimensions(), this->_p.coords.begin() ); - } - - linearized_ndim_iterator() = delete; - - linearized_ndim_iterator( const self_t& original ): - _p( original._p ) {} - - self_t& operator=( const self_t& original ) = default; - - //linearized_ndim_iterator( self_t&& original ) = delete; - - //self_t operator=( self_t&& ) = delete; - - ~linearized_ndim_iterator() {} - - self_t & operator++() noexcept { - bool rewind{ true }; - // rewind only the first N-1 coordinates - for( std::size_t i { 0 }; i < this->_p.system->dimensions() - 1 && rewind; i++ ) { - T& coord = this->_p.coords[ i ]; - // must rewind dimension if we wrap-around - /* - T new_coord = ( coord + 1 ) % this->_p.system->get_sizes()[ i ]; - rewind = new_coord < coord; - coord = new_coord; - */ - T plus = coord + 1; - rewind = plus >= this->_p.system->get_sizes()[ i ]; - coord = rewind ? 0 : plus; - } - // if we still have to rewind, increment the last coordinate, which is unbounded - if( rewind ) { - this->_p.coords[ this->_p.system->dimensions() - 1 ]++; - } - return *this; - } - - self_t & operator+=( std::size_t offset ) { - std::size_t linear{ _p.get_linear_position() + offset }; - if( linear > _p.system->system_size() ) { - throw std::invalid_argument("increment is too large"); - } - _p.system->linear_to_ndim( linear, _p.coords ); - return *this; - } - - difference_type operator-( const self_t &other ) const { - std::size_t a_pos{ _p.get_linear_position() }, - b_pos{ other._p.get_linear_position() }; - std::size_t lowest{ std::min( a_pos, b_pos ) }, highest{ std::max( a_pos, b_pos )}; - - if( highest - lowest > static_cast< std::size_t >( - std::numeric_limits< difference_type >::max() ) ) { - throw std::invalid_argument( "iterators are too distant" ); - } - - return ( static_cast< difference_type >( a_pos - b_pos ) ); - } - - reference operator*() const { - return this->_p; - } - - pointer operator->() const { - return &( this->_p ); - } - - bool operator!=( const self_t &o ) const { - const std::size_t dims{ this->_p.system->dimensions() }; - if( dims != o._p.system->dimensions() ) { - throw std::invalid_argument("system sizes do not match"); - } - bool equal{ true }; - for( std::size_t i{0}; i < dims && equal; i++) { - equal &= ( this->_p.coords[i] == o._p.coords[i] ); - } - return !equal; - } - - // implementation depending on logic in operator++ - static self_t - make_system_end_iterator( const lin_t &_system ) { - // fill with 0s - self_t iter( _system ); - std::size_t last{ iter->system->dimensions() - 1 }; - // store last size in last position - iter._p.coords[ last ] = iter->system->get_sizes()[ last ]; - return iter; - } - -private: - ndim_point _p; - -}; + template< + typename SizeType, + typename InternalVectorType + > class LinearizedNDimIterator { + public: + + using VectorType = InternalVectorType; + using LinNDimSysType = LinearizedNDimSystem< SizeType, VectorType >; + using ConstVectorReference = const VectorType&; + using SelfType = LinearizedNDimIterator< SizeType, InternalVectorType >; + + struct NDimPoint { + private: + + const LinNDimSysType* system; // pointer because of copy assignment + VectorType coords; + + public: + + friend SelfType; + + NDimPoint() = delete; + + NDimPoint( const NDimPoint& ) = default; + + NDimPoint( NDimPoint&& ) = delete; + + NDimPoint( const LinNDimSysType& _system ) noexcept : + system( &_system ), + coords( _system.dimensions() ) + { + std::fill_n( this->coords.begin(), _system.dimensions(), 0 ); + } + + NDimPoint& operator=( const NDimPoint& ) = default; + + inline ConstVectorReference get_position() const { + return coords; + } + + size_t get_linear_position() const { + return system->ndim_to_linear( coords ); + } + }; + + + // interface for std::random_access_iterator + using iterator_category = std::random_access_iterator_tag; + using value_type = NDimPoint; + using pointer = const value_type*; + using reference = const value_type&; + using difference_type = signed long; + + LinearizedNDimIterator( const LinNDimSysType &_system ) noexcept : + _p( _system ) + {} + + template< typename IterT > LinearizedNDimIterator( const LinNDimSysType &_system, IterT begin ) noexcept : + _p( _system ) + { + std::copy_n( begin, _system.dimensions(), this->_p.coords.begin() ); + } + + LinearizedNDimIterator() = delete; + + LinearizedNDimIterator( const SelfType &original ): + _p( original._p ) {} + + SelfType& operator=( const SelfType &original ) = default; + + // LinearizedNDimIterator( SelfType && ) = delete; + + // SelfType operator=( SelfType && ) = delete; + + ~LinearizedNDimIterator() {} + + SelfType & operator++() noexcept { + bool rewind{ true }; + // rewind only the first N-1 coordinates + for( size_t i { 0 }; i < this->_p.system->dimensions() - 1 && rewind; i++ ) { + SizeType& coord = this->_p.coords[ i ]; + // must rewind dimension if we wrap-around + /* + SizeType new_coord = ( coord + 1 ) % this->_p.system->get_sizes()[ i ]; + rewind = new_coord < coord; + coord = new_coord; + */ + SizeType plus = coord + 1; + rewind = plus >= this->_p.system->get_sizes()[ i ]; + coord = rewind ? 0 : plus; + } + // if we still have to rewind, increment the last coordinate, which is unbounded + if( rewind ) { + this->_p.coords[ this->_p.system->dimensions() - 1 ]++; + } + return *this; + } + + SelfType & operator+=( size_t offset ) { + size_t linear{ _p.get_linear_position() + offset }; + if( linear > _p.system->system_size() ) { + throw std::invalid_argument("increment is too large"); + } + _p.system->linear_to_ndim( linear, _p.coords ); + return *this; + } + + difference_type operator-( const SelfType &other ) const { + size_t a_pos{ _p.get_linear_position() }, + b_pos{ other._p.get_linear_position() }; + size_t lowest{ std::min( a_pos, b_pos ) }, highest{ std::max( a_pos, b_pos )}; + if( highest - lowest > static_cast< size_t >( + std::numeric_limits< difference_type >::max() ) ) { + throw std::invalid_argument( "iterators are too distant" ); + } + return ( static_cast< difference_type >( a_pos - b_pos ) ); + } + + reference operator*() const { + return this->_p; + } + + pointer operator->() const { + return &( this->_p ); + } + + bool operator!=( const SelfType &o ) const { + const size_t dims{ this->_p.system->dimensions() }; + if( dims != o._p.system->dimensions() ) { + throw std::invalid_argument("system sizes do not match"); + } + bool equal{ true }; + for( size_t i{0}; i < dims && equal; i++) { + equal &= ( this->_p.coords[i] == o._p.coords[i] ); + } + return !equal; + } + + // implementation depending on logic in operator++ + static SelfType make_system_end_iterator( const LinNDimSysType &_system ) { + // fill with 0s + SelfType iter( _system ); + size_t last{ iter->system->dimensions() - 1 }; + // store last size in last position + iter._p.coords[ last ] = iter->system->get_sizes()[ last ]; + return iter; + } + + private: + NDimPoint _p; + + }; } // namespace geometry } // namespace utils } // namespace grb -#endif // _NDIM_ITERATOR_H_ +#endif // _H_GRB_ALGORITHMS_GEOMETRY_NDIM_ITERATOR diff --git a/include/graphblas/utils/geometry/linearized_ndim_system.hpp b/include/graphblas/utils/geometry/linearized_ndim_system.hpp index 2916208ed..87352aa19 100644 --- a/include/graphblas/utils/geometry/linearized_ndim_system.hpp +++ b/include/graphblas/utils/geometry/linearized_ndim_system.hpp @@ -1,6 +1,22 @@ -#ifndef _NDIM_SYSTEM_LINEARIZER_H_ -#define _NDIM_SYSTEM_LINEARIZER_H_ +/* + * Copyright 2022 Huawei Technologies Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef _H_GRB_ALGORITHMS_GEOMETRY_NDIM_SYSTEM_LINEARIZER +#define _H_GRB_ALGORITHMS_GEOMETRY_NDIM_SYSTEM_LINEARIZER #include #include @@ -9,166 +25,192 @@ #include #include #include +#include #include "ndim_system.hpp" #include "linearized_ndim_iterator.hpp" -#include "array_vector_storage.hpp" +// #include "array_vector_storage.hpp" +/** + * @file linearized_ndim_system.cpp + * @author Alberto Scolari (alberto.scolari@huawei.com) + * Definition of \p LinearizedNDimSystem. + * + * @date 2022-10-24 + */ namespace grb { namespace utils { namespace geometry { -template< typename IterIn, typename IterOut > - std::size_t __compute_offsets( IterIn in_begin, IterIn in_end, IterOut out_begin ) { - std::size_t prod{1}; - for( ; in_begin != in_end; ++in_begin, ++out_begin ) { - *out_begin = prod; - prod *= *in_begin; - } - return prod; -} - -// container for system sizes, doing only ndim <--> linear translation -template< typename T, typename StorageT > class linearized_ndim_system: - public ndim_system< T, StorageT > { -public: - - using base_t = ndim_system< T, StorageT >; - using storage_t = StorageT; - using self_t = linearized_ndim_system< T, StorageT >; - - using vector_reference = typename base_t::vector_reference; - using const_vector_reference = typename base_t::const_vector_reference; - using vector_storage = typename storage_t::vector_storage; - using const_vector_storage = typename storage_t::const_vector_storage; - using iterator = linearized_ndim_iterator< T, storage_t >; - - template< typename IterT > linearized_ndim_system( IterT begin, IterT end) noexcept : - base_t( begin, end ), - offsets( std::distance( begin, end ) ) - { - this->_system_size = __compute_offsets( begin, end, this->offsets.begin() ) ; - } - - linearized_ndim_system() = delete; - - linearized_ndim_system( const self_t &original ) = default; - - - linearized_ndim_system( self_t &&original ) noexcept: - base_t( std::move(original) ), offsets( std::move( original.offsets ) ), - _system_size( original._system_size ) { - original._system_size = 0; - } - - linearized_ndim_system( const std::vector & _sizes ) noexcept : - linearized_ndim_system( _sizes.cbegin(), _sizes.cend() ) {} - - linearized_ndim_system( std::size_t _dimensions, std::size_t max_value ) noexcept : - base_t( _dimensions, max_value ), - offsets( _dimensions ), - _system_size( _dimensions ) - { - T v{1}; - for( std::size_t i{0}; i < _dimensions; i++ ) { - this->offsets[i] = v; - v *= max_value; - } - this->_system_size = v; - } - - ~linearized_ndim_system() {} - - self_t& operator=( const self_t & ) = default; - - //linearized_ndim_system& operator=( linearized_ndim_system &&original ) = delete; - - inline std::size_t system_size() const { - return this->_system_size; - } - - inline const_vector_reference get_offsets() const { - return this->offsets; - } - - void linear_to_ndim(std::size_t linear, vector_reference output ) const { - if( linear > this->_system_size ) { - throw std::range_error( "linear value beyond system" ); - } - for( std::size_t _i{ this->offsets.dimensions() }; _i > 0; _i-- ) { - const std::size_t dim{ _i - 1 }; - const std::size_t coord{ linear / this->offsets[dim] }; - output[dim] = coord; - linear -= ( coord * this->offsets[dim] ); - } - assert( linear == 0 ); - } - - std::size_t ndim_to_linear_check( const_vector_reference ndim_vector) const { - return this->ndim_to_linear_check( ndim_vector.storage() ); - } - - std::size_t ndim_to_linear_check( const_vector_storage ndim_vector ) const { - std::size_t linear { 0 }; - for( std::size_t i { 0 }; i < this->dimensions(); i++ ) { - if( ndim_vector[i] >= this->get_sizes()[i] ) { - throw std::invalid_argument( "input vector beyond system sizes" ); - } - } - return ndim_to_linear( ndim_vector ); - } - - std::size_t ndim_to_linear( const_vector_reference ndim_vector) const { - return this->ndim_to_linear( ndim_vector.storage() ); - } - - std::size_t ndim_to_linear( const_vector_storage ndim_vector ) const { - std::size_t linear { 0 }; - for( std::size_t i { 0 }; i < this->dimensions(); i++ ) { - linear += this->offsets[i] * ndim_vector[i]; - } - return linear; - } - - std::size_t ndim_to_linear_offset( const_vector_storage ndim_vector ) const { - std::size_t linear { 0 }; - std::size_t steps{ 1 }; - for( std::size_t i { 0 }; i < this->dimensions(); i++ ) { - linear += steps * ndim_vector[i]; - steps *= this->_sizes[i]; - } - return linear; - } - - // must be same dimensionality - void retarget( const_vector_reference _new_sizes ) { - if( _new_sizes.dimensions() != this->_sizes.dimensions() ) { - throw std::invalid_argument("new system must have same dimensions as previous: new " - + std::to_string( _new_sizes.dimensions() ) + ", old " - + std::to_string( this->_sizes.dimensions() ) ); - } - this->_sizes = _new_sizes; // copy - this->_system_size = __compute_offsets( _new_sizes.begin(), _new_sizes.end(), this->offsets.begin() ) ; - } - - iterator begin() const { - return iterator( *this ); - } - - iterator end() const { - return iterator::make_system_end_iterator( *this ); - } - -private: - storage_t offsets; - std::size_t _system_size; - -}; + /** + * Extends a \p NDimSystem by linearizing it, i.e. it provides facilities to map a vector in + * NDimSystem#dimensions() dimensions to a linear value ranging from \a 0 to #system_size() + * and vice versa. Such a linearized representation allows user logic to iterate over the system: + * iterators are indeed available via #begin()/#end(). + * + * Further facilities are methods to map users' vectors from linear to NDimSystem#dimensions()-dimensional + * or vice versa and also to "retaget" the system, i.e. to represent a system of same dimensionality + * but different sizes. + * + * @tparam SizeType integral type to store the size of each dimension + * @tparam InternalStorageType internal vector type to store the sizes + */ + template< + typename SizeType, + typename InternalVectorType + > class LinearizedNDimSystem: public NDimSystem< SizeType, InternalVectorType > { + + public: + static_assert( std::is_integral< SizeType >::value, "SizeType must be an integral type"); + + using BaseType = NDimSystem< SizeType, InternalVectorType >; + using SelfType = LinearizedNDimSystem< SizeType, InternalVectorType >; + using VectorType = typename BaseType::VectorType; + + using VectorReference = typename BaseType::VectorReference; + using ConstVectorReference = typename BaseType::ConstVectorReference; + using VectorStorageType = typename VectorType::VectorStorageType; + using ConstVectorStorageType = typename VectorType::ConstVectorStorageType; + using Iterator = LinearizedNDimIterator< SizeType, InternalVectorType >; + + template< typename IterT > LinearizedNDimSystem( IterT begin, IterT end) noexcept : + BaseType( begin, end ), + offsets( std::distance( begin, end ) ) + { + this->_system_size = compute_offsets( begin, end, this->offsets.begin() ) ; + } + + LinearizedNDimSystem( const std::vector< size_t > &_sizes ) noexcept : + LinearizedNDimSystem( _sizes.cbegin(), _sizes.cend() ) {} + + LinearizedNDimSystem( size_t _dimensions, size_t max_value ) noexcept : + BaseType( _dimensions, max_value ), + offsets( _dimensions ), + _system_size( _dimensions ) + { + SizeType v{1}; + for( size_t i{0}; i < _dimensions; i++ ) { + this->offsets[i] = v; + v *= max_value; + } + this->_system_size = v; + } + + LinearizedNDimSystem() = delete; + + LinearizedNDimSystem( const SelfType &original ) = default; + + LinearizedNDimSystem( SelfType &&original ) noexcept: + BaseType( std::move(original) ), offsets( std::move( original.offsets ) ), + _system_size( original._system_size ) { + original._system_size = 0; + } + + ~LinearizedNDimSystem() {} + + SelfType& operator=( const SelfType & ) = default; + + SelfType& operator=( SelfType &&original ) = delete; + + inline size_t system_size() const { + return this->_system_size; + } + + inline ConstVectorReference get_offsets() const { + return this->offsets; + } + + void linear_to_ndim( size_t linear, VectorReference output ) const { + if( linear > this->_system_size ) { + throw std::range_error( "linear value beyond system" ); + } + for( size_t _i{ this->offsets.dimensions() }; _i > 0; _i-- ) { + const size_t dim{ _i - 1 }; + const size_t coord{ linear / this->offsets[dim] }; + output[dim] = coord; + linear -= ( coord * this->offsets[dim] ); + } + assert( linear == 0 ); + } + + size_t ndim_to_linear_check( ConstVectorReference ndim_vector) const { + return this->ndim_to_linear_check( ndim_vector.storage() ); + } + + size_t ndim_to_linear_check( ConstVectorStorageType ndim_vector ) const { + size_t linear { 0 }; + for( size_t i { 0 }; i < this->dimensions(); i++ ) { + if( ndim_vector[i] >= this->get_sizes()[i] ) { + throw std::invalid_argument( "input vector beyond system sizes" ); + } + } + return ndim_to_linear( ndim_vector ); + } + + size_t ndim_to_linear( ConstVectorReference ndim_vector) const { + return this->ndim_to_linear( ndim_vector.storage() ); + } + + size_t ndim_to_linear( ConstVectorStorageType ndim_vector ) const { + size_t linear { 0 }; + for( size_t i { 0 }; i < this->dimensions(); i++ ) { + linear += this->offsets[i] * ndim_vector[i]; + } + return linear; + } + + // probably same as ndim_to_linear !!! + size_t ndim_to_linear_offset( ConstVectorStorageType ndim_vector ) const { + size_t linear{ 0 }; + size_t steps{ 1 }; + for( size_t i{ 0 }; i < this->dimensions(); i++ ) { + linear += steps * ndim_vector[i]; + steps *= this->_sizes[i]; + } + return linear; + } + + // must be same dimensionality + void retarget( ConstVectorReference _new_sizes ) { + if( _new_sizes.dimensions() != this->_sizes.dimensions() ) { + throw std::invalid_argument("new system must have same dimensions as previous: new " + + std::to_string( _new_sizes.dimensions() ) + ", old " + + std::to_string( this->_sizes.dimensions() ) ); + } + this->_sizes = _new_sizes; // copy + this->_system_size = compute_offsets( _new_sizes.begin(), _new_sizes.end(), this->offsets.begin() ) ; + } + + Iterator begin() const { + return Iterator( *this ); + } + + Iterator end() const { + return Iterator::make_system_end_iterator( *this ); + } + + private: + + VectorType offsets; + size_t _system_size; + + template< + typename IterIn, + typename IterOut + > static size_t compute_offsets( IterIn in_begin, IterIn in_end, IterOut out_begin ) { + size_t prod{1}; + for( ; in_begin != in_end; ++in_begin, ++out_begin ) { + *out_begin = prod; + prod *= *in_begin; + } + return prod; + } + }; } // namespace geometry } // namespace utils } // namespace grb -#endif // _NDIM_SYSTEM_LINEARIZER_H_ +#endif // _H_GRB_ALGORITHMS_GEOMETRY_NDIM_SYSTEM_LINEARIZER diff --git a/include/graphblas/utils/geometry/ndim_system.hpp b/include/graphblas/utils/geometry/ndim_system.hpp index 41434f3c4..f9a97c18d 100644 --- a/include/graphblas/utils/geometry/ndim_system.hpp +++ b/include/graphblas/utils/geometry/ndim_system.hpp @@ -1,69 +1,133 @@ -#ifndef _NDIM_SYSTEM_H_ -#define _NDIM_SYSTEM_H_ +/* + * Copyright 2022 Huawei Technologies Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef _H_GRB_ALGORITHMS_GEOMETRY_NDIM_SYSTEM +#define _H_GRB_ALGORITHMS_GEOMETRY_NDIM_SYSTEM #include #include #include #include +#include +#include #include "array_vector_storage.hpp" +/** + * @file ndim_system.cpp + * @author Alberto Scolari (alberto.scolari@huawei.com) + * Definition of \p NDimSystem. + * + * @date 2022-10-24 + */ namespace grb { namespace utils { namespace geometry { -template< typename T, typename StorageT > class ndim_system { - -public: - using storage_t = StorageT; - using vector_reference = storage_t&; - using const_vector_reference = const storage_t&; - using self_t = ndim_system< T, StorageT >; - - template< typename IterT > ndim_system( IterT begin, IterT end) noexcept : - _sizes( std::distance( begin, end ) ) - { - std::copy( begin, end, this->_sizes.begin() ); - } - - ndim_system() = delete; - - ndim_system( const self_t & ) = default; - - ndim_system( const std::vector & _sizes ) noexcept : - self_t( _sizes.cbegin(), _sizes.cend() ) {} - - ndim_system( std::size_t _dimensions, std::size_t max_value ) noexcept : - _sizes( _dimensions ) - { - std::fill_n( this->_sizes.begin(), _dimensions, max_value ); - } - - ndim_system( self_t &&original ) noexcept: _sizes( std::move( original._sizes ) ) {} - - ~ndim_system() {} - - self_t & operator=( const self_t &original ) = default; - - //self_t & operator=( self_t &&original ) = delete; - - inline std::size_t dimensions() const noexcept { - return _sizes.dimensions(); - } - - inline const_vector_reference get_sizes() const noexcept { - return this->_sizes; - } - -protected: - - storage_t _sizes; -}; + /** + * Describes a #dimensions()-dimensional system by storing its size along each dimension. + * + * It is meant to represent a grid of #dimensions() dimensions and size #get_sizes()[d] + * for each dimension \a d in the interval [0, #dimensions())<\em>. + * + * @tparam SizeType integral type to store the size of each dimension + * @tparam InternalStorageType internal vector type to store the sizes + */ + template< + typename SizeType, + typename InternalVectorType + > class NDimSystem { + + public: + static_assert( std::is_integral< SizeType >::value, "SizeType must be an integral type"); + + using VectorType = InternalVectorType; + using VectorReference = VectorType&; + using ConstVectorReference = const VectorType&; + using SelfType = NDimSystem< SizeType, InternalVectorType >; + + /** + * Construct a new NDimSystem object from an iterable range. + * + * The dimension is computed as \a std::distance(begin,end), i.e. + * \p IterT should be a random-access iterator for performance. + * + * @tparam IterT iterator type + * @param begin range begin + * @param end end of range + */ + template< typename IterT > NDimSystem( IterT begin, IterT end) noexcept : + _sizes( std::distance( begin, end ) ) + { + std::copy( begin, end, this->_sizes.begin() ); + } + + /** + * Construct a new NDimSystem object from an std::vector<>, taking its values + * as system sizes and its length as number of dimensions. + */ + NDimSystem( const std::vector< size_t > &_sizes ) noexcept : + SelfType( _sizes.cbegin(), _sizes.cend() ) {} + + /** + * Construct a new NDimSystem object of dimensions \p dimensions + * and with all sizes initialized to \p max_size + */ + NDimSystem( size_t _dimensions, size_t max_size ) noexcept : + _sizes( _dimensions ) + { + std::fill_n( this->_sizes.begin(), _dimensions, max_size ); + } + + NDimSystem() = delete; + + NDimSystem( const SelfType & ) = default; + + // NDimSystem( SelfType && ) = default; + + // NDimSystem( SelfType &&original ) noexcept: _sizes( std::move( original._sizes ) ) {} + NDimSystem( SelfType && ) = delete; + + ~NDimSystem() {} + + SelfType & operator=( const SelfType &original ) = default; + + SelfType & operator=( SelfType &&original ) = delete; + + inline size_t dimensions() const noexcept { + return _sizes.dimensions(); + } + + /** + * Get the sizes of the represented system as an iterable \p InternalStorageType + * object. + */ + inline ConstVectorReference get_sizes() const noexcept { + return this->_sizes; + } + + protected: + + InternalVectorType _sizes; + }; } // namespace geometry } // namespace utils } // namespace grb -#endif +#endif // _H_GRB_ALGORITHMS_GEOMETRY_NDIM_SYSTEM diff --git a/include/graphblas/utils/geometry/ndim_vector.hpp b/include/graphblas/utils/geometry/ndim_vector.hpp index 9c9ad3b6a..eca89137e 100644 --- a/include/graphblas/utils/geometry/ndim_vector.hpp +++ b/include/graphblas/utils/geometry/ndim_vector.hpp @@ -1,14 +1,12 @@ -#ifndef _NDIM_VECTOR_H_ -#define _NDIM_VECTOR_H_ +#ifndef _H_GRB_ALGORITHMS_GEOMETRY_NDIM_VECTOR +#define _H_GRB_ALGORITHMS_GEOMETRY_NDIM_VECTOR #include #include -#include -#include -#include -#include #include +#include +#include #include "linearized_ndim_system.hpp" @@ -16,107 +14,127 @@ namespace grb { namespace utils { namespace geometry { -template< typename OutT, typename CoordsT, typename StorageT > class ndim_vector { - -public: - - using const_domain_vector_reference = - typename linearized_ndim_system< CoordsT, StorageT >::const_vector_reference; - using domain_vector_storage = typename StorageT::const_vector_storage; - using domain_iterator = typename linearized_ndim_system< CoordsT, StorageT >::iterator; - -private: - - const linearized_ndim_system< CoordsT, StorageT > _linearizer; - OutT* data; - - inline std::size_t get_coordinate( domain_vector_storage coordinates ) const { - return this->_linearizer.ndim_to_linear( coordinates ); - } - - inline std::size_t get_coordinate( domain_iterator coordinates ) const { - return this->_linearizer.ndim_to_linear( coordinates ); - } - - void clean_mem() { - if ( this->data == nullptr ) { - delete[] this->data; - } - } - -public: - - ndim_vector() = delete; - - template< typename IterT > ndim_vector( IterT begin, IterT end): _linearizer( begin, end ) { - static_assert( std::is_default_constructible< OutT >::value, - "the stored type is not default constructible" ); - this->data = new OutT[ _linearizer.system_size() ]; - } - - ndim_vector( const std::vector & _sizes ): - ndim_vector( _sizes.cbegin(), _sizes.cend() ) {} - - // ndim_vector( const ndim_vector< OutT, CoordsT, StorageT >& original ): - // _linearizer( original._linearizer ) { - // this->data = new std::size_t[ original.data_size() ]; - // std::copy_n( original.data, original.data_size(), this->data ); - // } - ndim_vector( const ndim_vector< OutT, CoordsT, StorageT >& original ) = delete; - - - ndim_vector( ndim_vector< OutT, CoordsT, StorageT >&& original ) noexcept: - _linearizer( std::move( original._linearizer ) ) { - this->data = original.data; - original.data = nullptr; - } - // ndim_vector( ndim_vector< OutT, CoordsT, StorageT >&& original ) = delete; - - ndim_vector< OutT, CoordsT, StorageT >& operator=( - const ndim_vector< OutT, CoordsT, StorageT > &original ) = delete; - - ndim_vector< OutT, CoordsT, StorageT >& operator=( - ndim_vector< OutT, CoordsT, StorageT > &&original ) = delete; - - ~ndim_vector() { - this->clean_mem(); - } - - std::size_t dimensions() const { - return this->_linearizer.dimensions(); - } - - std::size_t data_size() const { - return this->_linearizer.system_size(); - } - - inline OutT& at( const_domain_vector_reference coordinates ) { - return this->data[ this->get_coordinate( coordinates.storage() ) ]; - } - - inline const OutT& at( const_domain_vector_reference coordinates ) const { - return this->data[ this->get_coordinate( coordinates.storage() ) ]; - } - - inline OutT& at( domain_vector_storage coordinates ) { - return this->data[ this->get_coordinate( coordinates ) ]; - } - - inline const OutT& at( domain_vector_storage coordinates ) const { - return this->data[ this->get_coordinate( coordinates ) ]; - } - - domain_iterator domain_begin() const { - return this->_linearizer.begin(); - } - - domain_iterator domain_end() const { - return this->_linearizer.end(); - } -}; + /** + * Maps an N-dimensional vector to an array of data. + * + * The user constructs an object by passing the sizes (as an N-dimensional vector) + * of the iteration space and accesses the stored data via an N-dimensional vector of coordinates. + * + * Example: if the user constructs an \p NDimVector with 3D sizes \a [2,3,4], she can access data + * via a 3D coordinates vector of ranges \a [0-1]x[0-2]x[0-3] (here \a x denoting the cartesian product) + * by using the #at() method. + * + * This facility allows associating a value of type \p DataType to, for example, + * each element of an N-dimensional grid. + * + * @tparam DataType type of data stored in the array + * @tparam SizeType type for the components of the N-dimensional vector: + * the maximum number of stored data is thus \f$ std::numeric_limits::max()^N \f$ + * @tparam InternalVectorType storage type of the internal N-dimensional vector + */ + template< + typename DataType, + typename SizeType, + typename InternalVectorType + > class NDimVector { + + public: + static_assert( std::is_default_constructible< DataType >::value, + "the stored type is not default constructible" ); + static_assert( std::is_integral< SizeType >::value, "SizeType must be integral" ); + + using ConstDomainVectorReference = + typename LinearizedNDimSystem< SizeType, InternalVectorType >::ConstVectorReference; + using ConstDomainVectorStorageType = typename InternalVectorType::ConstVectorStorageType; + using DomainIterator = typename LinearizedNDimSystem< SizeType, InternalVectorType >::Iterator; + using Selftype = NDimVector< DataType, SizeType, InternalVectorType >; + + NDimVector() = delete; + + template< typename IterT > NDimVector( IterT begin, IterT end) : + _linearizer( begin, end ) + { + this->data = new DataType[ _linearizer.system_size() ]; + } + + NDimVector( const std::vector< size_t > &_sizes ) : + NDimVector( _sizes.cbegin(), _sizes.cend() ) {} + + NDimVector( const Selftype& original ): + _linearizer( original._linearizer ), + data( new DataType[ original.data_size() ] ) + { + std::copy_n( original.data, original.data_size(), this->data ); + } + + NDimVector( Selftype&& original ) noexcept: + _linearizer( std::move( original._linearizer ) ) + { + this->data = original.data; + original.data = nullptr; + } + + Selftype& operator=( const Selftype &original ) = delete; + + Selftype& operator=( Selftype &&original ) = delete; + + ~NDimVector() { + this->clean_mem(); + } + + size_t dimensions() const { + return this->_linearizer.dimensions(); + } + + size_t data_size() const { + return this->_linearizer.system_size(); + } + + inline DataType& at( ConstDomainVectorReference coordinates ) { + return this->data[ this->get_coordinate( coordinates.storage() ) ]; + } + + inline const DataType& at( ConstDomainVectorReference coordinates ) const { + return this->data[ this->get_coordinate( coordinates.storage() ) ]; + } + + inline DataType& at( ConstDomainVectorStorageType coordinates ) { + return this->data[ this->get_coordinate( coordinates ) ]; + } + + inline const DataType& at( ConstDomainVectorStorageType coordinates ) const { + return this->data[ this->get_coordinate( coordinates ) ]; + } + + DomainIterator domain_begin() const { + return this->_linearizer.begin(); + } + + DomainIterator domain_end() const { + return this->_linearizer.end(); + } + + private: + const LinearizedNDimSystem< SizeType, InternalVectorType > _linearizer; + DataType* data; + + inline size_t get_coordinate( ConstDomainVectorStorageType coordinates ) const { + return this->_linearizer.ndim_to_linear( coordinates ); + } + + inline size_t get_coordinate( DomainIterator coordinates ) const { + return this->_linearizer.ndim_to_linear( coordinates ); + } + + void clean_mem() { + if ( this->data == nullptr ) { + delete[] this->data; + } + } + }; } // namespace geometry } // namespace utils } // namespace grb -#endif // _NDIM_VECTOR_H_ +#endif // _H_GRB_ALGORITHMS_GEOMETRY_NDIM_VECTOR diff --git a/include/graphblas/utils/iterators/IteratorValueAdaptor.hpp b/include/graphblas/utils/iterators/IteratorValueAdaptor.hpp new file mode 100644 index 000000000..81864cb20 --- /dev/null +++ b/include/graphblas/utils/iterators/IteratorValueAdaptor.hpp @@ -0,0 +1,128 @@ + +/* + * Copyright 2022 Huawei Technologies Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file IteratorValueAdaptor.hpp + * @author Alberto Scolari (alberto.scolari@huawei.com) + * @brief Adaptor to extract a given value out of an iterator. + * @date 2022-10-08 + */ + +#ifndef H_GRB_UTILS_ITERATOR_VALUE_ADAPTOR +#define H_GRB_UTILS_ITERATOR_VALUE_ADAPTOR + +#include +#include +#include + +namespace grb { + namespace utils { + + /** + * Adaptor for an iterator, to extract the value pointed to by the * operator. + * It wraps an iterator under the same interface, using an object of type \a AdaptorType + * to adapt the returned value. + * + * @tparam InnerIterType type of the underlying iterator + * @tparam AdaptorType type of the adaptor, to be instantiated by default + */ + template< + typename InnerIterType, + typename AdaptorType + > struct IteratorValueAdaptor { + + static_assert( std::is_default_constructible< AdaptorType >::value, "RefType must be default-constructible" ); + static_assert( std::is_copy_constructible< AdaptorType >::value, "RefType must be copy-constructible" ); + static_assert( std::is_copy_assignable< AdaptorType >::value, "RefType must be copy-assignable" ); + + typedef decltype( std::declval< AdaptorType >()( *std::declval< InnerIterType >() ) ) reference; + typedef typename std::decay< reference >::type value_type; + typedef value_type * pointer; + typedef const value_type * const_pointer; + typedef typename std::iterator_traits< InnerIterType >::iterator_category iterator_category; + typedef typename std::iterator_traits< InnerIterType >::difference_type difference_type; + + static constexpr bool is_random_access = std::is_base_of< + std::random_access_iterator_tag, iterator_category >::value; + + InnerIterType iter; + AdaptorType adaptor; + + using SelfType = IteratorValueAdaptor< InnerIterType, AdaptorType >; + + /** + * Construct a new Iterator Value Adaptor object fro an actual iterator. + * The adaptor is built via its default constructor. + * + * @param _iter the underlying iterator, to be copied + */ + IteratorValueAdaptor( + const InnerIterType &_iter + ) : + iter( _iter ), + adaptor() {} + + /** + * Construct a new Iterator Value Adaptor object fro an actual iterator. + * The adaptor is built via its default constructor. + * + * @param _iter the underlying iterator, to be moved + */ + IteratorValueAdaptor( + InnerIterType &&_iter + ) : + iter( std::move( _iter ) ), + adaptor() {} + + IteratorValueAdaptor() = delete; + + IteratorValueAdaptor( const SelfType & ) = default; + + IteratorValueAdaptor( SelfType && ) = default; + + SelfType& operator=( const SelfType & ) = default; + + SelfType& operator=( SelfType && ) = default; + + bool operator!=( const SelfType & o ) const { return o.iter != iter; } + + bool operator==( const SelfType & o ) const { return ! operator!=( o ); } + + reference operator*() { return adaptor( *iter ); } + + const reference operator*() const { return adaptor( *iter ); } + + pointer operator->() { return adaptor( *iter ); } + + const_pointer operator->() const { return adaptor( *iter ); } + + SelfType& operator++() { ++iter; return *this; } + + SelfType & operator+=( typename std::enable_if< is_random_access, const size_t >::type offset ) { + iter += offset; + return *this; + } + + difference_type operator-( typename std::enable_if< is_random_access, const SelfType & >::type other ) { + return iter - other.iter; + } + }; + + } // end namespace utils +} // end namespace grb + +#endif // H_GRB_UTILS_ITERATOR_VALUE_ADAPTOR diff --git a/include/graphblas/utils/iterators/partition_range.hpp b/include/graphblas/utils/iterators/partition_range.hpp new file mode 100644 index 000000000..dd5f397c4 --- /dev/null +++ b/include/graphblas/utils/iterators/partition_range.hpp @@ -0,0 +1,71 @@ + +/* + * Copyright 2022 Huawei Technologies Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#ifndef H_GRB_UTILS_PARTITION_RANGE +#define H_GRB_UTILS_PARTITION_RANGE + +namespace grb { + namespace utils { + + template< typename T > void partition_nonzeroes( + size_t num_procs, + size_t this_proc, + T num_elements, + T& first_offset, + T& last_offset + ) { + const T per_process{ ( num_elements + num_procs - 1 ) / num_procs }; // round up + first_offset = std::min( per_process * static_cast< T >( this_proc ), num_elements ); + last_offset = std::min( first_offset + per_process, num_elements ); + } + + template< typename IterT > void partition_iteration_range_on_procs( + size_t num_procs, + size_t this_proc, + size_t num_nonzeroes, + IterT &begin, + IterT &end + ) { + static_assert( std::is_base_of< std::random_access_iterator_tag, + typename std::iterator_traits< IterT >::iterator_category >::value, + "the given iterator is not a random access one" ); + assert( num_nonzeroes == static_cast< size_t >( end - begin ) ); + size_t first, last; + partition_nonzeroes( num_procs, this_proc, num_nonzeroes, first, last ); + if( last < num_nonzeroes ) { + end = begin; + end += last; + } + begin += first; + } + + template< typename IterT > void partition_iteration_range_on_procs( + size_t num_nonzeroes, + IterT &begin, + IterT &end + ) { + return partition_iteration_range_on_procs( spmd<>::nprocs(), spmd<>::pid(), num_nonzeroes, begin, end ); + } + + } // namespace utils +} // namespace grb + +#endif // H_GRB_UTILS_PARTITION_RANGE diff --git a/tests/smoke/hpcg.cpp b/tests/smoke/hpcg.cpp index 93c69d87e..2b544fb16 100644 --- a/tests/smoke/hpcg.cpp +++ b/tests/smoke/hpcg.cpp @@ -33,12 +33,13 @@ #include #include #include +#include #include -#include - -// #define TEST_ITER +//========== TRACE SOLVER STEPS ========= +// to easily trace the steps of the solver, just define this symbol +// #define HPCG_PRINT_STEPS // here we define a custom macro and do not use NDEBUG since the latter is not defined for smoke tests #ifdef HPCG_PRINT_STEPS @@ -53,10 +54,7 @@ #define DBG_println( args ) std::cout << args << std::endl; // forward declaration for the tracing facility -template< typename T, - class Ring = grb::Semiring< grb::operators::add< T >, grb::operators::mul< T >, grb::identities::zero, grb::identities::one > -> -void print_norm( const grb::Vector< T > &r, const char * head, const Ring &ring = Ring() ); +template< typename T > void print_norm( const grb::Vector< T > &r, const char * head ); /** * @brief prints \p head and the norm of \p r. @@ -65,11 +63,9 @@ void print_norm( const grb::Vector< T > &r, const char * head, const Ring &ring #endif #include +#include #include -#include -#include - #include #include @@ -100,10 +96,10 @@ using namespace algorithms; static const char * const TEXT_HIGHLIGHT = "===> "; #define thcout ( std::cout << TEXT_HIGHLIGHT ) #define thcerr ( std::cerr << TEXT_HIGHLIGHT ) - +#define MASTER_PRINT( pid, txt ) if( pid == 0 ) { std::cout << txt; } /** - * @brief Container for system parameters to create the HPCG problem. + * Container for system parameters to create the HPCG problem. */ struct system_input { size_t nx, ny, nz; @@ -111,7 +107,7 @@ struct system_input { }; /** - * @brief Container for the parameters for the HPCG simulation. + * Container for the parameters for the HPCG simulation. */ struct simulation_input : public system_input { size_t test_repetitions; @@ -122,30 +118,32 @@ struct simulation_input : public system_input { bool print_iter_stats; }; +using IOType = double; +using NonzeroType = double; +using InputType = double; +using ResidualType = double; +using StdRing = Semiring< grb::operators::add< NonzeroType >, grb::operators::mul< NonzeroType >, + grb::identities::zero, grb::identities::one >; +using StdMinus = operators::subtract< NonzeroType >; +using coord_t = size_t; + /** - * @brief Containers for test outputs. + * Containers for test outputs. */ struct output { - RC error_code; - size_t test_repetitions; - size_t performed_iterations; - double residual; + RC error_code = SUCCESS; + size_t test_repetitions = 0; + size_t performed_iterations = 0; + NonzeroType residual = 0.0; grb::utils::TimerResults times; - std::unique_ptr< PinnedVector< double > > pinnedVector; - double square_norm_diff; - - output() { - error_code = SUCCESS; - test_repetitions = 0; - performed_iterations = 0; - residual = 0.0; - } + std::unique_ptr< PinnedVector< IOType > > pinnedVector; + NonzeroType square_norm_diff; }; /** - * @brief Returns the closets power of 2 bigger or equal to \p n . + * Returns the closets power of 2 bigger or equal to \p n . */ -template< typename T = size_t > +template< typename T > T static next_pow_2( T n ) { static_assert( std::is_integral< T >::value, "Integral required." ); --n; @@ -157,49 +155,136 @@ T static next_pow_2( T n ) { return n + 1; } +using hpcg_runner_t = HPCGRunnerType< IOType, NonzeroType, InputType, ResidualType, + StdRing, StdMinus >; +using mg_data_t = multigrid_data< IOType, NonzeroType >; +using coarsening_data_t = coarsening_data< IOType, NonzeroType >; +using smoothing_data_t = smoother_data< IOType >; +using hpcg_data_t = mg_cg_data< IOType, NonzeroType, InputType >; + /** - * @brief Builds and initializes a 3D system for an HPCG simulation according to the given 3D system sizes. + * Builds and initializes a 3D system for an HPCG simulation according to the given 3D system sizes. * @return RC grb::SUCCESS if the system initialization within GraphBLAS succeeded */ -static RC build_3d_system( std::unique_ptr< hpcg_data< double, double, double > > & holder, const system_input & in ) { - struct hpcg_system_params< 3, double > params { +static void build_3d_system( + const system_input & in, + std::vector< std::unique_ptr< mg_data_t > > &system_levels, + std::vector< std::unique_ptr< coarsening_data_t > > &coarsener_levels, + std::vector< std::unique_ptr< smoothing_data_t > > &smoother_levels, + std::unique_ptr< hpcg_data_t > &holder +) { + constexpr size_t DIMS = 3; + using builder_t = grb::algorithms::HPCGBuilder< DIMS, coord_t, NonzeroType >; + const size_t pid { spmd<>::pid() }; + grb::utils::Timer timer; + + hpcg_system_params< 3, NonzeroType > params { { in.nx, in.ny, in.nz }, HALO_RADIUS, SYSTEM_DIAG_VALUE, SYSTEM_NON_DIAG_VALUE, PHYS_SYSTEM_SIZE_MIN, in.max_coarsening_levels, 2 }; - return build_hpcg_system< 3, double >( holder, params ); + std::vector< builder_t > mg_generators; + MASTER_PRINT( pid, "building HPCG generators for " << ( in.max_coarsening_levels + 1 ) + << " levels..." ); + timer.reset(); + build_hpcg_multigrid_generators( params, mg_generators ); + double time = timer.time(); + MASTER_PRINT( pid, " time (ms) " << time << std::endl ); + MASTER_PRINT( pid, "built HPCG generators for " << mg_generators.size() + << " levels" << std::endl ); + + hpcg_data_t *data{ new hpcg_data_t( mg_generators[ 0 ].system_size() ) }; + holder = std::unique_ptr< hpcg_data_t >( data ); + + std::vector< size_t > mg_sizes; + // exclude main system + std::transform( mg_generators.cbegin(), mg_generators.cend(), std::back_inserter( mg_sizes ), + [] ( const builder_t &b ) { return b.system_size(); } ); + + MASTER_PRINT( pid, "allocating data for the MultiGrid simulation..."); + timer.reset(); + allocate_multigrid_data( mg_sizes, system_levels, coarsener_levels, smoother_levels ); + time = timer.time(); + MASTER_PRINT( pid, " time (ms) " << time << std::endl ) + + // zero all vectors + MASTER_PRINT( pid, "zeroing all vectors..."); + timer.reset(); + data->zero_temp_vectors(); + std::for_each( system_levels.begin(), system_levels.end(), + []( std::unique_ptr< mg_data_t > &s) { s->zero_temp_vectors(); } ); + std::for_each( coarsener_levels.begin(), coarsener_levels.end(), + []( std::unique_ptr< coarsening_data_t > &s) { s->zero_temp_vectors(); } ); + std::for_each( smoother_levels.begin(), smoother_levels.end(), + []( std::unique_ptr< smoothing_data_t > &s) { s->zero_temp_vectors(); } ); + time = timer.time(); + MASTER_PRINT( pid, " time (ms) " << time << std::endl ) + + assert( mg_generators.size() == system_levels.size() ); + assert( mg_generators.size() == smoother_levels.size() ); + assert( mg_generators.size() - 1 == coarsener_levels.size() ); + + for( size_t i = 0; i < mg_generators.size(); i++) { + MASTER_PRINT( pid, "SYSTEM LEVEL " << i << std::endl ); + MASTER_PRINT( pid, " populating system matrix: " ); + timer.reset(); + populate_system_matrix( mg_generators[ i ], system_levels.at(i)->A ); + time = timer.time(); + MASTER_PRINT( pid, " time (ms) " << time << std::endl ) + + MASTER_PRINT( pid, " populating smoothing data: " ); + timer.reset(); + populate_smoothing_data( mg_generators[ i ], *smoother_levels[ i ] ); + time = timer.time(); + MASTER_PRINT( pid, " time (ms) " << time << std::endl ) + + if( i > 0 ) { + MASTER_PRINT( pid, " populating coarsening data: " ); + timer.reset(); + populate_coarsener( mg_generators[ i - 1 ], mg_generators[ i ], *coarsener_levels[ i - 1 ] ); + time = timer.time(); + MASTER_PRINT( pid, " time (ms) " << time << std::endl ) + } + } } #ifdef HPCG_PRINT_SYSTEM -static void print_system( const hpcg_data< double, double, double > & data ) { - print_matrix( data.A, 70, "A" ); - multi_grid_data< double, double > * coarser = data.coarser_level; - while( coarser != nullptr ) { - print_matrix( coarser->coarsening_matrix, 50, "COARSENING MATRIX" ); - print_matrix( coarser->A, 50, "COARSER SYSTEM MATRIX" ); - coarser = coarser->coarser_level; +static void print_system( + const std::vector< std::unique_ptr< mg_data_t > > &system_levels, + const std::vector< std::unique_ptr< coarsening_data_t > > &coarsener_levels +) { + print_matrix( system_levels[ 0 ]->A, 70, "A" ); + for( size_t i = 0; i < coarsener_levels.size(); i++ ) { + print_matrix( coarsener_levels[i ] ->coarsening_matrix, 50, "COARSENING MATRIX" ); + print_matrix( system_levels[ i + 1 ]->A, 50, "COARSER SYSTEM MATRIX" ); } } #endif #ifdef HPCG_PRINT_STEPS -template< typename T, - class Ring = Semiring< grb::operators::add< T >, grb::operators::mul< T >, grb::identities::zero, grb::identities::one > - > -void print_norm( const grb::Vector< T > & r, const char * head, const Ring & ring ) { +template< + typename T, + class Ring +> void print_norm( const grb::Vector< T > & r, const char * head, const Ring & ring ) { T norm = 0; RC ret = grb::dot( norm, r, r, ring ); // norm = r' * r; (void)ret; assert( ret == SUCCESS ); if( head != nullptr ) { - std::cout << head << ": "; printf(">>> %s: %lf\n", head, norm ); } else { printf(">>> %lf\n", norm ); } } + +template< typename T > void print_norm( const grb::Vector< T > & r, const char * head ) { + return print_norm( r, head, StdRing() ); +} #endif + + + /** * @brief Main test, building an HPCG problem and running the simulation closely following the * parameters in the reference HPCG test. @@ -208,44 +293,49 @@ void grbProgram( const simulation_input & in, struct output & out ) { // get user process ID const size_t pid { spmd<>::pid() }; assert( pid < spmd<>::nprocs() ); + if( pid == 0 ) { + thcout << "beginning input generation..." << std::endl; + } grb::utils::Timer timer; // assume successful run out.error_code = SUCCESS; - RC rc { SUCCESS }; // wrap hpcg_data inside a unique_ptr to forget about cleaning chores - std::unique_ptr< hpcg_data< double, double, double > > hpcg_state; - if( pid == 0 ) { - thcout << "beginning input generation..." << std::endl; - } + std::unique_ptr< hpcg_data_t > hpcg_state; + + hpcg_runner_t hpcg_runner( build_hpcg_runner< IOType, NonzeroType, InputType, ResidualType, + StdRing, StdMinus >( in.smoother_steps ) ); + auto &mg_runner = hpcg_runner.mg_runner; + auto &coarsener = mg_runner.coarsener_runner; + auto &smoother = mg_runner.smoother_runner; + hpcg_runner.cg_opts.max_iterations = in.max_iterations; + hpcg_runner.cg_opts.tolerance = 0.0; + hpcg_runner.cg_opts.with_preconditioning = ! in.no_preconditioning; + timer.reset(); - rc = build_3d_system( hpcg_state, in ); + build_3d_system( in, mg_runner.system_levels, coarsener.coarsener_levels, smoother.levels, hpcg_state ); double input_duration { timer.time() }; - if( rc != SUCCESS ) { - std::cerr << "Failure to generate the system (" << toString( rc ) << ")." << std::endl; - out.error_code = rc; - return; - } if( pid == 0 ) { thcout << "input generation time (ms): " << input_duration << std::endl; } #ifdef HPCG_PRINT_SYSTEM if( pid == 0 ) { - print_system( *hpcg_state ); + print_system( mg_runner.system_levels, coarsener.coarsener_levels ); } #endif - Matrix< double > & A { hpcg_state->A }; - Vector< double > & x { hpcg_state->x }; - Vector< double > & b { hpcg_state->b }; + Matrix< NonzeroType > & A { mg_runner.system_levels[ 0 ]->A }; + Vector< NonzeroType > & x { hpcg_state->x }; + Vector< NonzeroType > & b { hpcg_state->b }; + RC rc { SUCCESS }; // set vectors as from standard HPCG benchmark set( x, 1.0 ); set( b, 0.0 ); - rc = grb::mxv( b, A, x, grb::Semiring< grb::operators::add< double >, grb::operators::mul< double >, grb::identities::zero, grb::identities::one >() ); + rc = grb::mxv( b, A, x, StdRing() ); set( x, 0.0 ); #ifdef HPCG_PRINT_SYSTEM @@ -257,15 +347,15 @@ void grbProgram( const simulation_input & in, struct output & out ) { out.times.preamble = timer.time(); - const bool with_preconditioning = ! in.no_preconditioning; + cg_out_data< NonzeroType > cg_out; + mg_data_t &grid_base = *mg_runner.system_levels[ 0 ]; if( in.evaluation_run ) { out.test_repetitions = 0; if( pid == 0 ) { thcout << "beginning evaluation run..." << std::endl; } timer.reset(); - rc = hpcg( *hpcg_state, with_preconditioning, in.smoother_steps, in.smoother_steps, - in.max_iterations, 0.0, out.performed_iterations, out.residual, false ); + rc = hpcg_runner( grid_base, *hpcg_state, cg_out ); double single_time = timer.time(); if( rc == SUCCESS ) { rc = collectives<>::reduce( single_time, 0, operators::max< double >() ); @@ -277,6 +367,8 @@ void grbProgram( const simulation_input & in, struct output & out ) { } out.times.useful = single_time; out.test_repetitions = static_cast< size_t >( 1000.0 / single_time ) + 1; + out.performed_iterations = cg_out.iterations; + out.residual = cg_out.norm_residual; if( pid == 0 ) { thcout << "Evaluation run" << std::endl; @@ -293,15 +385,17 @@ void grbProgram( const simulation_input & in, struct output & out ) { if( pid == 0 ) { thcout << "beginning cold run..." << std::endl; } + hpcg_runner.cg_opts.max_iterations = 1; timer.reset(); - rc = hpcg( *hpcg_state, with_preconditioning, in.smoother_steps, in.smoother_steps, - 1, 0.0, out.performed_iterations, out.residual, false ); + rc = hpcg_runner( grid_base, *hpcg_state, cg_out ); double iter_duration { timer.time() }; if( pid == 0 ) { thcout << "cold run duration (ms): " << iter_duration << std::endl; } + hpcg_runner.cg_opts.max_iterations = in.max_iterations; + hpcg_runner.cg_opts.print_iter_stats = in.print_iter_stats; // do benchmark for( size_t i = 0; i < in.test_repetitions && rc == SUCCESS; ++i ) { rc = set( x, 0.0 ); @@ -310,8 +404,7 @@ void grbProgram( const simulation_input & in, struct output & out ) { thcout << "beginning iteration: " << i << std::endl; } timer.reset(); - rc = hpcg( *hpcg_state, with_preconditioning, in.smoother_steps, in.smoother_steps, - in.max_iterations, 0.0, out.performed_iterations, out.residual, in.print_iter_stats ); + rc = hpcg_runner( grid_base, *hpcg_state, cg_out ); iter_duration = timer.time(); out.times.useful += iter_duration; if( pid == 0 ) { @@ -324,6 +417,9 @@ void grbProgram( const simulation_input & in, struct output & out ) { } out.times.useful /= static_cast< double >( in.test_repetitions ); + out.performed_iterations = cg_out.iterations; + out.residual = cg_out.norm_residual; + if( spmd<>::pid() == 0 ) { if( rc == SUCCESS ) { thcout << "repetitions, average time (ms): " << out.test_repetitions @@ -339,15 +435,13 @@ void grbProgram( const simulation_input & in, struct output & out ) { // set error code out.error_code = rc; - Semiring< grb::operators::add< double >, grb::operators::mul< double >, - grb::identities::zero, grb::identities::one > ring; grb::set( b, 1.0 ); out.square_norm_diff = 0.0; - grb::eWiseMul( b, -1.0, x, ring ); - grb::dot( out.square_norm_diff, b, b, ring ); + grb::eWiseMul( b, -1.0, x, StdRing() ); + grb::dot( out.square_norm_diff, b, b, StdRing() ); // output - out.pinnedVector = std::unique_ptr< PinnedVector< double > >( new PinnedVector< double >( x, SEQUENTIAL ) ); + out.pinnedVector = std::unique_ptr< PinnedVector< NonzeroType > >( new PinnedVector< NonzeroType >( x, SEQUENTIAL ) ); // finish timing const double time_taken { timer.time() }; out.times.postamble = time_taken; @@ -358,26 +452,11 @@ void grbProgram( const simulation_input & in, struct output & out ) { */ static void parse_arguments( simulation_input &, size_t &, double &, int, char ** ); -#ifdef TEST_ITER -static void test_iters(); -static void test_iters2(); -#endif - -void test_system_iter(); - int main( int argc, char ** argv ) { simulation_input sim_in; size_t test_outer_iterations; double max_residual_norm; -#ifdef TEST_ITER - test_iters(); - test_iters2(); - return 0; -#endif - test_system_iter(); - // return 0; - parse_arguments( sim_in, test_outer_iterations, max_residual_norm, argc, argv ); thcout << "System size x: " << sim_in.nx << std::endl; thcout << "System size y: " << sim_in.ny << std::endl; @@ -414,7 +493,8 @@ int main( int argc, char ** argv ) { grb::Benchmarker< AUTOMATIC > benchmarker; rc = benchmarker.exec( &grbProgram, sim_in, out, 1, test_outer_iterations, true ); ASSERT_RC_SUCCESS( rc ); - thcout << "Benchmark completed successfully and took " << out.performed_iterations << " iterations to converge with residual " << out.residual << std::endl; + thcout << "Benchmark completed successfully and took " << out.performed_iterations + << " iterations to converge with residual " << out.residual << std::endl; if( ! out.pinnedVector ) { thcerr << "no output vector to inspect" << std::endl; @@ -457,8 +537,7 @@ static void parse_arguments( simulation_input & sim_in, size_t & outer_iteration "the execution of the algorithm)" ) .add_optional_argument( "--smoother-steps", sim_in.smoother_steps, SMOOTHER_STEPS_DEF, "number of pre/post-smoother steps; 0 disables smoothing" ) .add_option( "--evaluation-run", sim_in.evaluation_run, false, - "launch single run directly, without benchmarker (ignore " - "repetitions)" ) + "launch single run directly, without benchmarker (ignore repetitions)" ) .add_option( "--no-preconditioning", sim_in.no_preconditioning, false, "do not apply pre-conditioning via multi-grid V cycle" ) .add_option( "--print-iter-stats", sim_in.print_iter_stats, false, "on each iteration, print more statistics" ); @@ -494,217 +573,3 @@ static void parse_arguments( simulation_input & sim_in, size_t & outer_iteration sim_in.max_iterations = 1; } } - - -void test_system_iter() { - constexpr size_t DIMS = 2; - using row_index_t = size_t; - std::array< row_index_t, DIMS > dims; - dims.fill( 4 ); - grb::utils::geometry::linearized_halo_ndim_system< row_index_t, DIMS > system( dims, 1 ); - grb::utils::geometry::linearized_halo_ndim_system< row_index_t, DIMS >::iterator begin = system.begin(); - - while( begin.has_more_elements() ) { - std::cout << "row " << begin->get_element_linear() << ": "; - while( begin.has_more_neighbours() ) { - std::cout << /* "-- " << */ begin->get_neighbor_linear() << " "; - begin.next_neighbour(); - } - std::cout << std::endl; - begin.next_element(); - } - - std::vector< size_t > colors, counters; - color_matrix_greedy( system, colors, counters ); - - std::cout << "final assignment:" << std::endl; - for( size_t i = 0; i < colors.size(); i++ ){ - std::cout << i << " -> " << colors[ i ] << ", "; - } - std::cout << std::endl; -} - - - -struct NZ { - size_t i; - size_t j; - double v; - - NZ( size_t _i, size_t _j, double _v ): i(_i), j(_j), v(_v) {} - - bool operator!=( const NZ& o ) const { - return i != o.i || j != o.j || v != o.v; - } -}; - -#ifdef TEST_ITER -static void test_iters() { - - using clock = std::chrono::steady_clock; - - constexpr size_t DIMS = 3; - using coord_t = size_t; - - std::array< coord_t, DIMS > finer_sizes{ 1024, 1024, 1024}; - std::array< coord_t, DIMS > coarser_sizes; - for( size_t i = 0; i < finer_sizes.size(); i++ ) { - coarser_sizes[ i ] = finer_sizes[ i ] / 2; - } - - size_t rows { std::accumulate( coarser_sizes.cbegin(), coarser_sizes.cend(), 1UL, std::multiplies< size_t >() ) }; - - std::array< size_t, DIMS > lfiner_sizes{ 1024, 1024, 1024}; - std::array< size_t, DIMS > lcoarser_sizes{}; - for( size_t i = 0; i < lfiner_sizes.size(); i++ ) { - lcoarser_sizes[ i ] = lfiner_sizes[ i ] / 2; - } - grb::algorithms::old::coarsener_generator_iterator< DIMS, double > sbegin( lcoarser_sizes, lfiner_sizes, 0 ); - grb::algorithms::old::coarsener_generator_iterator< DIMS, double > send( lcoarser_sizes, lfiner_sizes, rows ); - - - using citer = hpcg_coarsener_builder< DIMS, coord_t, double >::hpcg_coarsener_iterator; - hpcg_coarsener_builder< DIMS, coord_t, double > coarsener( coarser_sizes, finer_sizes ); - citer pbegin( coarsener.make_begin_iterator() ); - const citer pend( coarsener.make_end_iterator() ); - - size_t num_elements = pend - pbegin; - std::cout << "number of elements: " << num_elements << std::endl; - - std::vector< NZ > svalues; - svalues.reserve( num_elements); - typename clock::time_point start( clock::now() ); - for( ; sbegin != send; ++sbegin ) { - // printf( "inserting %lu %lu\n", sbegin.i(), sbegin.j() ); - svalues.emplace_back( sbegin.i(), sbegin.j(), sbegin.v() ); - } - typename clock::time_point finish( clock::now() ); - std::cout << "sequential generation time (ms): " << - std::chrono::duration< double, std::milli >( finish - start ).count() << std::endl; - - - - - const size_t nthreads = omp_get_max_threads(); - size_t per_thread_num = ( num_elements + nthreads - 1 ) / nthreads; - std::vector< std::vector< NZ > > tvalues( nthreads ); - for( size_t i = 0; i < nthreads; i++ ) { - tvalues[i].reserve( per_thread_num ); - } - start = clock::now(); - #pragma omp parallel - { - - int t = omp_get_thread_num(); - std::vector< NZ > &tv = tvalues[ t ]; - // printf( "thread %d, size %lu\n", t, tv.size() ); - #pragma omp for schedule( static ) - for( auto it = pbegin; it != pend; ++it ) { - tv.emplace_back( it.i(), it.j(), it.v() ); - // printf( "thread %d: inserting %lu %lu\n", t, it.i(), it.j() ); - } - } - finish = clock::now(); - std::cout << "parallel generation time (ms): " << - std::chrono::duration< double, std::milli >( finish - start ).count() << std::endl; - - std::vector< NZ > pvalues; - for( const std::vector< NZ > &tv: tvalues ) { - pvalues.insert( pvalues.end(), tv.cbegin(), tv.cend() ); - } - - - if( svalues.size() != pvalues.size() ) { - std::cout << "different sizes!" << std::endl; - std::exit(-1); - } - - for( size_t i = 0; i < svalues.size(); i++ ) { - if( svalues[i] != pvalues[i] ) { - std::cout << "error at position " << i << std::endl; - } - } - std::cout << "all OK" << std::endl; -} - -static void test_iters2() { - - using clock = std::chrono::steady_clock; - using coord_t = size_t; - - constexpr size_t DIMS = 3, halo_size = 1; - constexpr double diag_value = 26.0, non_diag_value = -1.0; - - std::array< coord_t, DIMS > sys_sizes{ 64, 64, 64}; - size_t n { std::accumulate( sys_sizes.cbegin(), sys_sizes.cend(), 1UL, std::multiplies< size_t >() ) }; - - std::array< size_t, DIMS > large_sys_sizes{ 64, 64, 64}; - old::matrix_generator_iterator< DIMS, double > sbegin( large_sys_sizes, 0UL, halo_size, diag_value, non_diag_value ); - old::matrix_generator_iterator< DIMS, double > send( large_sys_sizes, n, halo_size, diag_value, non_diag_value ); - - hpcg_builder< DIMS, coord_t, double > hpcg_system( sys_sizes, halo_size ); - matrix_generator_iterator< DIMS, coord_t, double > pbegin( - hpcg_system.make_begin_iterator( diag_value, non_diag_value ) ); - matrix_generator_iterator< DIMS, coord_t, double > pend( - hpcg_system.make_end_iterator( diag_value, non_diag_value ) - ); - - size_t num_elements = pend - pbegin; - std::cout << "number of elements: " << num_elements << std::endl; - - std::vector< NZ > svalues; - svalues.reserve( num_elements); - typename clock::time_point start( clock::now() ); - for( ; sbegin != send; ++sbegin ) { - svalues.emplace_back( sbegin.i(), sbegin.j(), sbegin.v() ); - } - typename clock::time_point finish( clock::now() ); - std::cout << "sequential generation time (ms): " << - std::chrono::duration< double, std::milli >( finish - start ).count() << std::endl; - - - - - const size_t nthreads = omp_get_max_threads(); - size_t per_thread_num = ( num_elements + nthreads - 1 ) / nthreads; - std::vector< std::vector< NZ > > tvalues( nthreads ); - for( size_t i = 0; i < nthreads; i++ ) { - tvalues[i].reserve( per_thread_num ); - } - start = clock::now(); - #pragma omp parallel - { - - int t = omp_get_thread_num(); - std::vector< NZ > &tv = tvalues[ t ]; - // printf( "thread %d, size %lu\n", t, tv.size() ); - #pragma omp for schedule( static ) - for( auto it = pbegin; it != pend; ++it ) { - tv.emplace_back( it.i(), it.j(), it.v() ); - // printf( "thread %d: inserting %lu %lu\n", t, it.i(), it.j() ); - } - } - finish = clock::now(); - std::cout << "parallel generation time (ms): " << - std::chrono::duration< double, std::milli >( finish - start ).count() << std::endl; - - std::vector< NZ > pvalues; - for( const std::vector< NZ > &tv: tvalues ) { - pvalues.insert( pvalues.end(), tv.cbegin(), tv.cend() ); - } - - - if( svalues.size() != pvalues.size() ) { - std::cout << "different sizes!" << std::endl; - std::exit(-1); - } - - for( size_t i = 0; i < svalues.size(); i++ ) { - if( svalues[i] != pvalues[i] ) { - std::cout << "error at position " << i << std::endl; - } - } - - std::cout << "all OK" << std::endl; -} -#endif // TEST_ITER From 8411dabe76167696f6b89f47b9236df12ed7b841 Mon Sep 17 00:00:00 2001 From: Alberto Scolari Date: Fri, 18 Nov 2022 14:58:08 +0100 Subject: [PATCH 08/28] reorganizing code in HPCG test to be more concise and clearer: - separating iterator for hpcg system generation and for coarsening to different files - little cleanups to geometry classes - renaming utils/geometry to utils/multigrid to more clearly indicate the intended usage --- ...rix_builders.hpp => coarsener_builder.hpp} | 270 +++----------- .../graphblas/algorithms/hpcg/coloring.hpp | 8 +- include/graphblas/algorithms/hpcg/hpcg.hpp | 6 +- .../algorithms/hpcg/system_builder.hpp | 152 ++++++++ .../algorithms/hpcg/system_building_utils.hpp | 187 +++++----- .../multigrid/multigrid_building_utils.hpp | 2 +- .../array_vector_storage.hpp | 26 +- .../dynamic_vector_storage.hpp | 11 +- .../halo_matrix_generator_iterator.hpp | 29 +- .../linearized_halo_ndim_geometry.hpp | 113 +++--- .../linearized_halo_ndim_iterator.hpp | 133 +------ .../linearized_halo_ndim_system.hpp | 25 +- .../linearized_ndim_iterator.hpp | 24 +- .../linearized_ndim_system.hpp | 26 +- .../{geometry => multigrid}/ndim_system.hpp | 19 +- .../{geometry => multigrid}/ndim_vector.hpp | 11 +- tests/smoke/hpcg.cpp | 349 ++++++++---------- 17 files changed, 593 insertions(+), 798 deletions(-) rename include/graphblas/algorithms/hpcg/{ndim_matrix_builders.hpp => coarsener_builder.hpp} (50%) create mode 100644 include/graphblas/algorithms/hpcg/system_builder.hpp rename include/graphblas/utils/{geometry => multigrid}/array_vector_storage.hpp (78%) rename include/graphblas/utils/{geometry => multigrid}/dynamic_vector_storage.hpp (94%) rename include/graphblas/utils/{geometry => multigrid}/halo_matrix_generator_iterator.hpp (89%) rename include/graphblas/utils/{geometry => multigrid}/linearized_halo_ndim_geometry.hpp (81%) rename include/graphblas/utils/{geometry => multigrid}/linearized_halo_ndim_iterator.hpp (63%) rename include/graphblas/utils/{geometry => multigrid}/linearized_halo_ndim_system.hpp (82%) rename include/graphblas/utils/{geometry => multigrid}/linearized_ndim_iterator.hpp (91%) rename include/graphblas/utils/{geometry => multigrid}/linearized_ndim_system.hpp (91%) rename include/graphblas/utils/{geometry => multigrid}/ndim_system.hpp (88%) rename include/graphblas/utils/{geometry => multigrid}/ndim_vector.hpp (95%) diff --git a/include/graphblas/algorithms/hpcg/ndim_matrix_builders.hpp b/include/graphblas/algorithms/hpcg/coarsener_builder.hpp similarity index 50% rename from include/graphblas/algorithms/hpcg/ndim_matrix_builders.hpp rename to include/graphblas/algorithms/hpcg/coarsener_builder.hpp index 5958ecb0d..2ee848039 100644 --- a/include/graphblas/algorithms/hpcg/ndim_matrix_builders.hpp +++ b/include/graphblas/algorithms/hpcg/coarsener_builder.hpp @@ -15,42 +15,17 @@ * limitations under the License. */ -/** - * @file ndim_matrix_builders.hpp - * @author Alberto Scolari (alberto.scolari@huawei.com) - * @brief Utilities to build matrices for an HPCG simulation in a generic number of dimensions - * - * In particular, the main matrices are: - * - a system matrix, generated from an N-dimenional space of coordinates by iterating along - * each dimension in priority order, where the first dimension has highest priority and the last - * dimension least priority; for each point (row), all its N-dimensional neighbours within - * a given distance are generated for the column - * - a coarsening matrix, generated by iterating on a coarser system of N dimensions (row) and projecting - * each point to a corresponding system of finer sizes - * - * @date 2021-04-30 - */ - -#ifndef _H_GRB_ALGORITHMS_NDIM_MATRIX_BUILDERS -#define _H_GRB_ALGORITHMS_NDIM_MATRIX_BUILDERS +#ifndef _H_GRB_ALGORITHMS_HPCG_COARSENER_BUILDER +#define _H_GRB_ALGORITHMS_HPCG_COARSENER_BUILDER -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include #include +#include #include +#include +#include -#include - - - +#include +#include namespace grb { namespace algorithms { @@ -60,133 +35,7 @@ namespace grb { typename CoordType, typename ValueType > - class HPCGBuilder { - public: - - struct HPCGDiagGenerator { - - ValueType _diag; - ValueType _non_diag; - - HPCGDiagGenerator( - ValueType diag, - ValueType non_diag - ) : _diag( diag ), - _non_diag( non_diag ) {} - - HPCGDiagGenerator & operator=( const HPCGDiagGenerator & ) = default; - - inline ValueType operator()( const CoordType &i, const CoordType &j ) const noexcept { - return j == i ? _diag: _non_diag; - } - }; - - using HaloSystemType = grb::utils::geometry::LinearizedHaloNDimSystem< CoordType, DIMS >; - using Iterator = geometry::HaloMatrixGeneratorIterator< DIMS, CoordType, ValueType, HPCGDiagGenerator >; - - HPCGBuilder( - const std::array< CoordType, DIMS > &sizes, - CoordType _halo, - ValueType diag, - ValueType non_diag - ) : - halo( _halo ), - system( sizes, _halo ), - _diag_generator( diag, non_diag ) - { - if( _halo <= 0 ) { - throw std::invalid_argument( "halo should be higher than 0" ); - } - for( const auto i : sizes ) { - if( i < 2 * _halo + 1 ) { - throw std::invalid_argument( "Iteration halo goes beyond system sizes" ); - } - } - } - - - HPCGBuilder( const HPCGBuilder< DIMS, CoordType, ValueType > & ) = default; - - HPCGBuilder( HPCGBuilder< DIMS, CoordType, ValueType > && ) = default; - - HPCGBuilder< DIMS, CoordType, ValueType > & operator=( const HPCGBuilder< DIMS, CoordType, ValueType > & ) = default; - - HPCGBuilder< DIMS, CoordType, ValueType > & operator=( HPCGBuilder< DIMS, CoordType, ValueType > && ) = default; - - size_t system_size() const { - return system.base_system_size(); - } - - size_t num_neighbors() const { - return system.halo_system_size(); - } - - const HaloSystemType & get_generator() const { - return system; - } - - Iterator make_begin_iterator() const { - return Iterator( system, _diag_generator ); - } - - Iterator make_end_iterator() const { - Iterator result( system, _diag_generator ); - result += num_neighbors() - 1; // do not trigger boundary checks - ++result; - return result; - } - - ValueType get_diag_value() const { - return _diag_generator._diag; - } - - ValueType get_non_diag_value() const { - return _diag_generator._non_diag; - } - - - private: - const CoordType halo; - HaloSystemType system; - HPCGDiagGenerator _diag_generator; - }; - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - template< - size_t DIMS, - typename CoordType, - typename T - > - class hpcg_coarsener_builder; - + class HPCGCoarsenerBuilder; /** * @brief Class to generate the coarsening matrix of an underlying \p DIMS -dimensional system. @@ -204,27 +53,24 @@ namespace grb { template< size_t DIMS, typename CoordType, - typename T - > - struct coarsener_generator_iterator { + typename ValueType + > struct HPCGCoarsenerGeneratorIterator { - friend hpcg_coarsener_builder< DIMS, CoordType, T >; + friend HPCGCoarsenerBuilder< DIMS, CoordType, ValueType >; using RowIndexType = CoordType; ///< numeric type of rows using ColumnIndexType = CoordType; - using ValueType = T; + using LinearSystemType = grb::utils::multigrid::LinearizedNDimSystem< CoordType, + grb::utils::multigrid::ArrayVectorStorage< DIMS, CoordType > >; + using LinearSystemIterType = typename LinearSystemType::Iterator; + using SelfType = HPCGCoarsenerGeneratorIterator< DIMS, CoordType, ValueType >; + using ArrayType = std::array< CoordType, DIMS >; - using lin_system_t = grb::utils::geometry::LinearizedNDimSystem< CoordType, - grb::utils::geometry::ArrayVectorStorage< CoordType, DIMS > >; - using __iter_t = typename lin_system_t::Iterator; - using SelfType = coarsener_generator_iterator< DIMS, CoordType, T >; - using array_t = std::array< CoordType, DIMS >; - - struct __value { + struct _HPCGValueGenerator { friend SelfType; - __value( + _HPCGValueGenerator( RowIndexType i, ColumnIndexType j ) noexcept : @@ -232,9 +78,9 @@ namespace grb { _j( j ) {} - __value( const __value & ) = default; + _HPCGValueGenerator( const _HPCGValueGenerator & ) = default; - __value & operator=( const __value & ) = default; + _HPCGValueGenerator & operator=( const _HPCGValueGenerator & ) = default; inline RowIndexType i() const { return _i; } inline ColumnIndexType j() const { return _j; } @@ -249,14 +95,14 @@ namespace grb { // interface for std::random_access_iterator using iterator_category = std::random_access_iterator_tag; - using value_type = __value; + using value_type = _HPCGValueGenerator; using pointer = const value_type; using reference = const value_type&; - using difference_type = typename __iter_t::difference_type; + using difference_type = typename LinearSystemIterType::difference_type; - coarsener_generator_iterator( const SelfType & o ) = default; + HPCGCoarsenerGeneratorIterator( const SelfType &o ) = default; - coarsener_generator_iterator( SelfType && o ) = default; + HPCGCoarsenerGeneratorIterator( SelfType &&o ) = default; SelfType & operator=( const SelfType & ) = default; @@ -333,20 +179,16 @@ namespace grb { return _val.v(); } - const __iter_t & it() const { - return this->_sys_iter; - } - private: //// incremented when incrementing the row coordinates; is is the ration between //// #finer_sizes and row_generator#physical_sizes - const lin_system_t *_lin_sys; - const array_t *_steps; ///< array of steps, i.e. how much each column coordinate (finer system) must be - __iter_t _sys_iter; + const LinearSystemType *_lin_sys; + const ArrayType *_steps; ///< array of steps, i.e. how much each column coordinate (finer system) must be + LinearSystemIterType _sys_iter; value_type _val; /** - * @brief Construct a new \c coarsener_generator_iterator object from the coarser and finer sizes, + * @brief Construct a new \c HPCGCoarsenerGeneratorIterator object from the coarser and finer sizes, * setting its row at \p _current_row and the column at the corresponding value. * * Each finer size must be an exact multiple of the corresponding coarser size, otherwise the @@ -356,9 +198,9 @@ namespace grb { * @param _finer_sizes sizes of the finer system (columns) * @param _current_row row (in the coarser system) to set the iterator on */ - coarsener_generator_iterator( - const lin_system_t &system, - const array_t &steps + HPCGCoarsenerGeneratorIterator( + const LinearSystemType &system, + const ArrayType &steps ) noexcept : _lin_sys( &system ), _steps( &steps ), @@ -389,70 +231,66 @@ namespace grb { } }; - template< size_t DIMS, typename CoordType, - typename T - > - class hpcg_coarsener_builder { + typename ValueType + > class HPCGCoarsenerBuilder { public: + using ArrayType = std::array< CoordType, DIMS >; + using Iterator = HPCGCoarsenerGeneratorIterator< DIMS, CoordType, ValueType >; + using SelfType = HPCGCoarsenerBuilder< DIMS, CoordType, ValueType >; - using array_t = std::array< CoordType, DIMS >; - using hpcg_coarsener_iterator = coarsener_generator_iterator< DIMS, CoordType, T >; - - hpcg_coarsener_builder( - const array_t &_coarser_sizes, - const array_t &_finer_sizes + HPCGCoarsenerBuilder( + const ArrayType &_finer_sizes, + const ArrayType &_coarser_sizes ) : system( _coarser_sizes.begin(), _coarser_sizes.end() ) { for( size_t i { 0 }; i < DIMS; i++ ) { // finer size MUST be an exact multiple of coarser_size - size_t step { _finer_sizes[ i ] / _coarser_sizes[ i ] }; - if( step == 0 || _finer_sizes[ i ] / step != _coarser_sizes[ i ] ) { + std::ldiv_t ratio = std::ldiv( _finer_sizes[ i ], _coarser_sizes[ i ] ); + if( ratio.quot < 2 || ratio.rem != 0 ) { throw std::invalid_argument( std::string( "finer size of dimension " ) + std::to_string( i ) + std::string( "is not an exact multiple of coarser size" ) ); } - steps[ i ] = step; + steps[ i ] = ratio.quot; } } - hpcg_coarsener_builder( const hpcg_coarsener_builder< DIMS, CoordType, T> & ) = delete; + HPCGCoarsenerBuilder( const SelfType & ) = delete; - hpcg_coarsener_builder( hpcg_coarsener_builder< DIMS, CoordType, T> && ) = delete; + HPCGCoarsenerBuilder( SelfType && ) = delete; - hpcg_coarsener_builder< DIMS, CoordType, T> & operator=( const hpcg_coarsener_builder< DIMS, CoordType, T> & ) = delete; + SelfType & operator=( const SelfType & ) = delete; - hpcg_coarsener_builder< DIMS, CoordType, T> & operator=( hpcg_coarsener_builder< DIMS, CoordType, T> && ) = delete; + SelfType & operator=( SelfType && ) = delete; size_t system_size() const { return system.system_size(); } - hpcg_coarsener_iterator make_begin_iterator() { - return hpcg_coarsener_iterator( system, steps ); + Iterator make_begin_iterator() { + return Iterator( system, steps ); } - hpcg_coarsener_iterator make_end_iterator() { - hpcg_coarsener_iterator result( system, steps ); + Iterator make_end_iterator() { + Iterator result( system, steps ); result += system_size() - 1; // do not trigger boundary checks ++result; return result; } private: - const grb::utils::geometry::LinearizedNDimSystem< CoordType, - grb::utils::geometry::ArrayVectorStorage< CoordType, DIMS > > system; + const grb::utils::multigrid::LinearizedNDimSystem< CoordType, + grb::utils::multigrid::ArrayVectorStorage< DIMS, CoordType > > system; - array_t steps; ///< array of steps, i.e. how much each column coordinate (finer system) must be + ArrayType steps; ///< array of steps, i.e. how much each column coordinate (finer system) must be //// incremented when incrementing the row coordinates; is is the ration between //// #finer_sizes and row_generator#physical_sizes }; - } // namespace algorithms } // namespace grb - -#endif // _H_GRB_ALGORITHMS_NDIM_MATRIX_BUILDERS +#endif // _H_GRB_ALGORITHMS_HPCG_COARSENER_BUILDER diff --git a/include/graphblas/algorithms/hpcg/coloring.hpp b/include/graphblas/algorithms/hpcg/coloring.hpp index f9334afb3..f5793b6ca 100644 --- a/include/graphblas/algorithms/hpcg/coloring.hpp +++ b/include/graphblas/algorithms/hpcg/coloring.hpp @@ -22,7 +22,7 @@ #include #include -#include +#include namespace grb { namespace algorithms { @@ -62,8 +62,8 @@ namespace grb { template< size_t DIMS, typename CoordType - > void color_matrix_greedy( - const grb::utils::geometry::LinearizedHaloNDimSystem< CoordType, DIMS > &system, + > void hpcg_greedy_color_ndim_system( + const grb::utils::multigrid::LinearizedHaloNDimSystem< DIMS, CoordType > &system, std::vector< CoordType > &row_colors, std::vector< CoordType > &color_counters, bool reorder_rows_per_color = false @@ -78,7 +78,7 @@ namespace grb { row_colors[0] = 0; // first point gets color 0 // Finds colors in a greedy (a likely non-optimal) fashion. - typename grb::utils::geometry::LinearizedHaloNDimSystem< CoordType, DIMS >::Iterator begin = system.begin(); + typename grb::utils::multigrid::LinearizedHaloNDimSystem< DIMS, CoordType >::Iterator begin = system.begin(); begin.next_element(); // skip first row while( begin.has_more_elements() ) { diff --git a/include/graphblas/algorithms/hpcg/hpcg.hpp b/include/graphblas/algorithms/hpcg/hpcg.hpp index 2d30584fe..9d65aa79f 100644 --- a/include/graphblas/algorithms/hpcg/hpcg.hpp +++ b/include/graphblas/algorithms/hpcg/hpcg.hpp @@ -15,8 +15,8 @@ * limitations under the License. */ -#ifndef _H_GRB_ALGORITHMS_HPCG -#define _H_GRB_ALGORITHMS_HPCG +#ifndef _H_GRB_ALGORITHMS_HPCG_HPCG +#define _H_GRB_ALGORITHMS_HPCG_HPCG #include @@ -71,4 +71,4 @@ namespace grb { } // namespace algorithms } // namespace grb -#endif // _H_GRB_ALGORITHMS_HPCG +#endif // _H_GRB_ALGORITHMS_HPCG_HPCG diff --git a/include/graphblas/algorithms/hpcg/system_builder.hpp b/include/graphblas/algorithms/hpcg/system_builder.hpp new file mode 100644 index 000000000..700718e3b --- /dev/null +++ b/include/graphblas/algorithms/hpcg/system_builder.hpp @@ -0,0 +1,152 @@ + +/* + * Copyright 2021 Huawei Technologies Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file system_builders.hpp + * @author Alberto Scolari (alberto.scolari@huawei.com) + * @brief Utilities to build matrices for an HPCG simulation in a generic number of dimensions + * + * In particular, the main matrices are: + * - a system matrix, generated from an N-dimenional space of coordinates by iterating along + * each dimension in priority order, where the first dimension has highest priority and the last + * dimension least priority; for each point (row), all its N-dimensional neighbours within + * a given distance are generated for the column + * - a coarsening matrix, generated by iterating on a coarser system of N dimensions (row) and projecting + * each point to a corresponding system of finer sizes + * + * @date 2021-04-30 + */ + +#ifndef _H_GRB_ALGORITHMS_HPCG_SYSTEM_BUILDER +#define _H_GRB_ALGORITHMS_HPCG_SYSTEM_BUILDER + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace grb { + namespace algorithms { + + template< + size_t DIMS, + typename CoordType, + typename ValueType + > class HPCGSystemBuilder { + public: + struct HPCGDiagGenerator { + + HPCGDiagGenerator( + ValueType diag, + ValueType non_diag + ) noexcept : + _diag( diag ), + _non_diag( non_diag ) {} + + HPCGDiagGenerator & operator=( const HPCGDiagGenerator & ) = default; + + inline ValueType operator()( const CoordType &i, const CoordType &j ) const noexcept { + return j == i ? _diag: _non_diag; + } + + ValueType _diag; + ValueType _non_diag; + }; + + using HaloSystemType = grb::utils::multigrid::LinearizedHaloNDimSystem< DIMS, CoordType >; + using Iterator = grb::utils::multigrid::HaloMatrixGeneratorIterator< DIMS, CoordType, + ValueType, HPCGDiagGenerator >; + + HPCGSystemBuilder( + const std::array< CoordType, DIMS > &sizes, + CoordType halo, + ValueType diag, + ValueType non_diag + ) : + _system( sizes, halo ), + _diag_generator( diag, non_diag ) + { + if( halo <= 0 ) { + throw std::invalid_argument( "halo should be higher than 0" ); + } + for( const auto i : sizes ) { + if( i < 2 * halo + 1 ) { + throw std::invalid_argument( "Iteration halo goes beyond system sizes" ); + } + } + } + + HPCGSystemBuilder( const HPCGSystemBuilder< DIMS, CoordType, ValueType > & ) = default; + + HPCGSystemBuilder( HPCGSystemBuilder< DIMS, CoordType, ValueType > && ) = default; + + HPCGSystemBuilder< DIMS, CoordType, ValueType > & operator=( const HPCGSystemBuilder< DIMS, CoordType, ValueType > & ) = default; + + HPCGSystemBuilder< DIMS, CoordType, ValueType > & operator=( HPCGSystemBuilder< DIMS, CoordType, ValueType > && ) = default; + + size_t system_size() const { + return _system.base_system_size(); + } + + size_t num_neighbors() const { + return _system.halo_system_size(); + } + + const HaloSystemType & get_generator() const { + return _system; + } + + Iterator make_begin_iterator() const { + return Iterator( _system, _diag_generator ); + } + + Iterator make_end_iterator() const { + Iterator result( _system, _diag_generator ); + result += num_neighbors() - 1; // do not trigger boundary checks + ++result; + return result; + } + + ValueType get_diag_value() const { + return _diag_generator._diag; + } + + ValueType get_non_diag_value() const { + return _diag_generator._non_diag; + } + + + private: + HaloSystemType _system; + HPCGDiagGenerator _diag_generator; + }; + + } // namespace algorithms +} // namespace grb + +#endif // _H_GRB_ALGORITHMS_HPCG_SYSTEM_BUILDER + diff --git a/include/graphblas/algorithms/hpcg/system_building_utils.hpp b/include/graphblas/algorithms/hpcg/system_building_utils.hpp index 7a8db963d..088bb9fb3 100644 --- a/include/graphblas/algorithms/hpcg/system_building_utils.hpp +++ b/include/graphblas/algorithms/hpcg/system_building_utils.hpp @@ -37,14 +37,73 @@ #include #include -#include "ndim_matrix_builders.hpp" - +#include "system_builder.hpp" +#include "coarsener_builder.hpp" #include "coloring.hpp" namespace grb { namespace algorithms { - template< typename CoordType > void split_rows_by_color( + /** + * @brief Container of the parameter for HPCG simulation generation: physical system characteristics and + * coarsening information. + * + * @tparam DIMS dimensions of the physical system + * @tparam T type of matrix values + */ + template< + size_t DIMS, + typename NonzeroType + > struct hpcg_system_params { + std::array< size_t, DIMS > physical_sys_sizes; + size_t halo_size; + NonzeroType diag_value; + NonzeroType non_diag_value; + size_t min_phys_size; + size_t max_levels; + size_t coarsening_step; + }; + + template< + size_t DIMS, + typename CoordType, + typename NonzeroType + > void hpcg_build_multigrid_generators( + const hpcg_system_params< DIMS, NonzeroType > ¶ms, + std::vector< grb::algorithms::HPCGSystemBuilder< DIMS, CoordType, NonzeroType > > &mg_generators + ) { + static_assert( DIMS > 0, "DIMS must be > 0" ); + + size_t const current_size{ std::accumulate( params.physical_sys_sizes.cbegin(), params.physical_sys_sizes.cend(), 1UL, + std::multiplies< size_t >() ) }; + if( current_size > std::numeric_limits< CoordType >::max() ) { + throw std::domain_error( "CoordT cannot store the matrix coordinates" ); + } + size_t min_physical_size { *std::min_element( params.physical_sys_sizes.cbegin(), params.physical_sys_sizes.cend() ) }; + if( min_physical_size < params.min_phys_size ) { + throw std::domain_error( "the initial system is too small" ); + } + + std::array< CoordType, DIMS > coord_sizes; + // type-translate coordinates + std::copy( params.physical_sys_sizes.cbegin(), params.physical_sys_sizes.cend(), coord_sizes.begin() ); + + // generate hierarchical coarseners + for( size_t coarsening_level = 0UL; + min_physical_size >= params.min_phys_size && coarsening_level <= params.max_levels; + coarsening_level++ ) { + + // build generator + mg_generators.emplace_back( coord_sizes, params.halo_size, params.diag_value, params.non_diag_value ); + + // prepare for new iteration + min_physical_size /= params.coarsening_step; + std::for_each( coord_sizes.begin(), coord_sizes.end(), + [ ¶ms ]( CoordType &v ){ v /= params.coarsening_step; }); + } + } + + template< typename CoordType > void hpcg_split_rows_by_color( const std::vector< CoordType > & row_colors, size_t num_colors, std::vector< std::vector< CoordType > > & per_color_rows @@ -57,11 +116,11 @@ namespace grb { template < size_t DIMS, - typename coord_t, + typename CoordType, typename NonzeroType, enum grb::Backend B - > grb::RC populate_system_matrix( - const grb::algorithms::HPCGBuilder< DIMS, coord_t, NonzeroType > &system_generator, + > grb::RC hpcg_populate_system_matrix( + const grb::algorithms::HPCGSystemBuilder< DIMS, CoordType, NonzeroType > &system_generator, grb::Matrix< NonzeroType, B > &M ) { const size_t pid { spmd<>::pid() }; @@ -69,30 +128,29 @@ namespace grb { if( pid == 0) { std::cout << "- generating system matrix..."; } - typename grb::algorithms::HPCGBuilder< DIMS, coord_t, NonzeroType >::Iterator begin( + typename grb::algorithms::HPCGSystemBuilder< DIMS, CoordType, NonzeroType >::Iterator begin( system_generator.make_begin_iterator() ); - typename grb::algorithms::HPCGBuilder< DIMS, coord_t, NonzeroType >::Iterator end( + typename grb::algorithms::HPCGSystemBuilder< DIMS, CoordType, NonzeroType >::Iterator end( system_generator.make_end_iterator() ); grb::utils::partition_iteration_range_on_procs( system_generator.num_neighbors(), begin, end ); return buildMatrixUnique( M, begin, end, grb::IOMode::PARALLEL ); } - template< - typename coord_t, size_t DIMS, + typename CoordType, typename IOType, typename NonzeroType - > grb::RC populate_coarsener( - const grb::algorithms::HPCGBuilder< DIMS, coord_t, NonzeroType > &finer_system_generator, - const grb::algorithms::HPCGBuilder< DIMS, coord_t, NonzeroType > &coarser_system_generator, + > grb::RC hpcg_populate_coarsener( + const grb::algorithms::HPCGSystemBuilder< DIMS, CoordType, NonzeroType > &finer_system_generator, + const grb::algorithms::HPCGSystemBuilder< DIMS, CoordType, NonzeroType > &coarser_system_generator, coarsening_data< IOType, NonzeroType > &coarsener ) { static_assert( DIMS > 0, "DIMS must be > 0" ); - const std::array< coord_t, DIMS > &finer_sizes = finer_system_generator.get_generator().get_sizes(); - const std::array< coord_t, DIMS > &coarser_sizes = coarser_system_generator.get_generator().get_sizes(); + const std::array< CoordType, DIMS > &finer_sizes = finer_system_generator.get_generator().get_sizes(); + const std::array< CoordType, DIMS > &coarser_sizes = coarser_system_generator.get_generator().get_sizes(); const size_t finer_size = finer_system_generator.system_size(); const size_t coarser_size = coarser_system_generator.system_size(); @@ -105,21 +163,15 @@ namespace grb { assert( finer_sizes.size() == coarser_sizes.size() ); - for( size_t i { 0 }; i < coarser_sizes.size(); i++ ) { - std::ldiv_t ratio = std::ldiv( finer_sizes[ i ], coarser_sizes[ i ] ); - if( ratio.quot < 2 || ratio.rem != 0 ) { - throw std::invalid_argument( "finer sizes should be a multiple of coarser sizes" ); - } - } grb::Matrix< NonzeroType > &M = coarsener.coarsening_matrix; if( grb::nrows( M ) != rows || grb::ncols( M ) != cols ) { throw std::invalid_argument( "wrong matrix dimensions: matrix should be rectangular" " with rows == and cols == " ); } - grb::algorithms::hpcg_coarsener_builder< DIMS, coord_t, NonzeroType > coarsener_builder( coarser_sizes, finer_sizes ); - grb::algorithms::coarsener_generator_iterator< DIMS, coord_t, NonzeroType > begin( coarsener_builder.make_begin_iterator() ); - grb::algorithms::coarsener_generator_iterator< DIMS, coord_t, NonzeroType > end( coarsener_builder.make_end_iterator() ); + grb::algorithms::HPCGCoarsenerBuilder< DIMS, CoordType, NonzeroType > coarsener_builder( finer_sizes, coarser_sizes ); + grb::algorithms::HPCGCoarsenerGeneratorIterator< DIMS, CoordType, NonzeroType > begin( coarsener_builder.make_begin_iterator() ); + grb::algorithms::HPCGCoarsenerGeneratorIterator< DIMS, CoordType, NonzeroType > end( coarsener_builder.make_end_iterator() ); grb::utils::partition_iteration_range_on_procs( coarsener_builder.system_size(), begin, end ); return buildMatrixUnique( M, begin, end, grb::IOMode::PARALLEL ); } @@ -196,7 +248,7 @@ namespace grb { * @return grb::RC the success value returned when trying to build the vector */ template< enum grb::Backend B > - grb::RC build_static_color_masks( + grb::RC hpcg_build_static_color_masks( size_t matrix_size, const std::vector< std::vector< size_t > > &per_color_rows, std::vector< grb::Vector< bool, B > > & masks @@ -206,7 +258,7 @@ namespace grb { } for( size_t i = 0; i < per_color_rows.size(); i++ ) { const std::vector< size_t > & rows = per_color_rows[ i ]; - /* +#ifdef _DEBUG { std::cout << "\ncolor " << i << std::endl; for( size_t row : rows ) { @@ -214,7 +266,7 @@ namespace grb { } std::cout << std::endl; } - */ +#endif masks.emplace_back( matrix_size ); grb::Vector< bool > & output_mask = masks.back(); std::vector< size_t >::const_iterator begin = rows.cbegin(); @@ -227,7 +279,7 @@ namespace grb { << toString( rc ) << std::endl; return rc; } - /* +#ifdef _DEBUG { std::cout << "mask color " << i << std::endl; size_t count = 0; @@ -238,7 +290,7 @@ namespace grb { } std::cout << std::endl; } - */ +#endif } return grb::SUCCESS; } @@ -246,12 +298,12 @@ namespace grb { } // namespace internal template< - typename coord_t, size_t DIMS, - typename T - > grb::RC populate_smoothing_data( - const grb::algorithms::HPCGBuilder< DIMS, coord_t, T > &system_generator, - smoother_data< T > &smoothing_info + typename CoordType, + typename NonzeroType + > grb::RC hpcg_populate_smoothing_data( + const grb::algorithms::HPCGSystemBuilder< DIMS, CoordType, NonzeroType > &system_generator, + smoother_data< NonzeroType > &smoothing_info ) { const size_t pid { spmd<>::pid() }; @@ -266,10 +318,10 @@ namespace grb { if( pid == 0 ) { std::cout << "- running coloring heuristics..."; } - std::vector< coord_t > colors, color_counters; - color_matrix_greedy( system_generator.get_generator(), colors, color_counters ); - std::vector< std::vector< coord_t > > per_color_rows; - split_rows_by_color( colors, color_counters.size(), per_color_rows ); + std::vector< CoordType > colors, color_counters; + hpcg_greedy_color_ndim_system( system_generator.get_generator(), colors, color_counters ); + std::vector< std::vector< CoordType > > per_color_rows; + hpcg_split_rows_by_color( colors, color_counters.size(), per_color_rows ); if( rc != grb::SUCCESS ) { if( pid == 0 ) { std::cout << "error: " << __LINE__ << std::endl; @@ -280,67 +332,10 @@ namespace grb { std::cout <<"- found " << color_counters.size() << " colors," << " generating color masks..."; } - return internal::build_static_color_masks( system_generator.system_size(), + return internal::hpcg_build_static_color_masks( system_generator.system_size(), per_color_rows, smoothing_info.color_masks ); } - /** - * @brief Container of the parameter for HPCG simulation generation: physical system characteristics and - * coarsening information. - * - * @tparam DIMS dimensions of the physical system - * @tparam T type of matrix values - */ - template< size_t DIMS, typename T > - struct hpcg_system_params { - std::array< size_t, DIMS > physical_sys_sizes; - size_t halo_size; - T diag_value; - T non_diag_value; - size_t min_phys_size; - size_t max_levels; - size_t coarsening_step; - }; - - template< - size_t DIMS, - typename coord_t, - typename T - > void build_hpcg_multigrid_generators( - const hpcg_system_params< DIMS, T > ¶ms, - std::vector< grb::algorithms::HPCGBuilder< DIMS, coord_t, T > > &mg_generators - ) { - static_assert( DIMS > 0, "DIMS must be > 0" ); - - size_t const current_size{ std::accumulate( params.physical_sys_sizes.cbegin(), params.physical_sys_sizes.cend(), 1UL, - std::multiplies< size_t >() ) }; - if( current_size > std::numeric_limits< coord_t >::max() ) { - throw std::domain_error( "CoordT cannot store the matrix coordinates" ); - } - size_t min_physical_size { *std::min_element( params.physical_sys_sizes.cbegin(), params.physical_sys_sizes.cend() ) }; - if( min_physical_size < params.min_phys_size ) { - throw std::domain_error( "the initial system is too small" ); - } - - std::array< coord_t, DIMS > coord_sizes; - // type-translate coordinates - std::copy( params.physical_sys_sizes.cbegin(), params.physical_sys_sizes.cend(), coord_sizes.begin() ); - - // generate linked list of hierarchical coarseners - for( size_t coarsening_level = 0UL; - min_physical_size >= params.min_phys_size && coarsening_level <= params.max_levels; - coarsening_level++ ) { - - // build generator - mg_generators.emplace_back( coord_sizes, params.halo_size, params.diag_value, params.non_diag_value ); - - // prepare for new iteration - min_physical_size /= params.coarsening_step; - std::for_each( coord_sizes.begin(), coord_sizes.end(), - [ ¶ms ]( coord_t &v ){ v /= params.coarsening_step; }); - } - } - } // namespace algorithms } // namespace grb diff --git a/include/graphblas/algorithms/multigrid/multigrid_building_utils.hpp b/include/graphblas/algorithms/multigrid/multigrid_building_utils.hpp index 714555426..34347582e 100644 --- a/include/graphblas/algorithms/multigrid/multigrid_building_utils.hpp +++ b/include/graphblas/algorithms/multigrid/multigrid_building_utils.hpp @@ -29,7 +29,7 @@ namespace grb { typename MGInfoType, typename CoarsenerInfoType, typename SmootherInfoType - > void allocate_multigrid_data( + > void multigrid_allocate_data( const std::vector< size_t > &mg_sizes, std::vector< std::unique_ptr< MGInfoType > > &system_levels, std::vector< std::unique_ptr< CoarsenerInfoType > > &coarsener_levels, diff --git a/include/graphblas/utils/geometry/array_vector_storage.hpp b/include/graphblas/utils/multigrid/array_vector_storage.hpp similarity index 78% rename from include/graphblas/utils/geometry/array_vector_storage.hpp rename to include/graphblas/utils/multigrid/array_vector_storage.hpp index 45fbab04e..8eb1e4377 100644 --- a/include/graphblas/utils/geometry/array_vector_storage.hpp +++ b/include/graphblas/utils/multigrid/array_vector_storage.hpp @@ -24,8 +24,8 @@ * @date 2022-10-24 */ -#ifndef _H_GRB_ALGORITHMS_GEOMETRY_ARRAY_VECTOR_STORAGE -#define _H_GRB_ALGORITHMS_GEOMETRY_ARRAY_VECTOR_STORAGE +#ifndef _H_GRB_ALGORITHMS_MULTIGRID_ARRAY_VECTOR_STORAGE +#define _H_GRB_ALGORITHMS_MULTIGRID_ARRAY_VECTOR_STORAGE #include #include @@ -34,7 +34,7 @@ namespace grb { namespace utils { - namespace geometry { + namespace multigrid { /** * Array with fixed size based on std::array with an interface compliant to what other classes @@ -46,14 +46,14 @@ namespace grb { * @tparam DIMS the dimensions of the vector */ template< - typename DataType, - size_t DIMS + size_t DIMS, + typename DataType > class ArrayVectorStorage: public std::array< DataType, DIMS > { - public: using VectorStorageType = std::array< DataType, DIMS >&; using ConstVectorStorageType = const std::array< DataType, DIMS >&; + using SelfType = ArrayVectorStorage< DIMS, DataType >; ArrayVectorStorage( size_t _dimensions ) { static_assert( DIMS > 0, "cannot allocate 0-sized array" ); @@ -65,20 +65,20 @@ namespace grb { ArrayVectorStorage() = delete; // only copy constructor/assignment, since there's no external storage - ArrayVectorStorage( const ArrayVectorStorage< DataType, DIMS > &o ) noexcept { + ArrayVectorStorage( const SelfType &o ) noexcept { std::copy_n( o.cbegin(), DIMS, this->begin() ); } - ArrayVectorStorage( ArrayVectorStorage< DataType, DIMS > &&o ) = delete; + ArrayVectorStorage( SelfType &&o ) = delete; - ArrayVectorStorage< DataType, DIMS >& operator=( - const ArrayVectorStorage< DataType, DIMS > &original + SelfType& operator=( + const SelfType &original ) noexcept { std::copy_n( original.begin(), DIMS, this->begin() ); return *this; } - ArrayVectorStorage< DataType, DIMS >& operator=( ArrayVectorStorage< DataType, DIMS > &&original ) = delete; + SelfType & operator=( SelfType &&original ) = delete; constexpr size_t dimensions() const { return DIMS; @@ -93,8 +93,8 @@ namespace grb { } }; - } // namespace geometry + } // namespace multigrid } // namespace utils } // namespace grb -#endif // _H_GRB_ALGORITHMS_GEOMETRY_ARRAY_VECTOR_STORAGE +#endif // _H_GRB_ALGORITHMS_MULTIGRID_ARRAY_VECTOR_STORAGE diff --git a/include/graphblas/utils/geometry/dynamic_vector_storage.hpp b/include/graphblas/utils/multigrid/dynamic_vector_storage.hpp similarity index 94% rename from include/graphblas/utils/geometry/dynamic_vector_storage.hpp rename to include/graphblas/utils/multigrid/dynamic_vector_storage.hpp index a0def1980..9168f175c 100644 --- a/include/graphblas/utils/geometry/dynamic_vector_storage.hpp +++ b/include/graphblas/utils/multigrid/dynamic_vector_storage.hpp @@ -15,8 +15,8 @@ * limitations under the License. */ -#ifndef _H_GRB_ALGORITHMS_GEOMETRY_DYNAMIC_VECTOR_STORAGE -#define _H_GRB_ALGORITHMS_GEOMETRY_DYNAMIC_VECTOR_STORAGE +#ifndef _H_GRB_ALGORITHMS_MULTIGRID_DYNAMIC_VECTOR_STORAGE +#define _H_GRB_ALGORITHMS_MULTIGRID_DYNAMIC_VECTOR_STORAGE #include #include @@ -32,7 +32,7 @@ namespace grb { namespace utils { - namespace geometry { + namespace multigrid { /** * Array with fixed size (i.e. decided at object creation) allocated on the heap with an interface compliant @@ -54,7 +54,6 @@ namespace grb { } public: - // iterator fields using reference = DataType&; using const_reference = const DataType&; @@ -147,8 +146,8 @@ namespace grb { } }; - } // namespace geometry + } // namespace multigrid } // namespace utils } // namespace grb -#endif // _H_GRB_ALGORITHMS_GEOMETRY_DYNAMIC_VECTOR_STORAGE +#endif // _H_GRB_ALGORITHMS_MULTIGRID_DYNAMIC_VECTOR_STORAGE diff --git a/include/graphblas/utils/geometry/halo_matrix_generator_iterator.hpp b/include/graphblas/utils/multigrid/halo_matrix_generator_iterator.hpp similarity index 89% rename from include/graphblas/utils/geometry/halo_matrix_generator_iterator.hpp rename to include/graphblas/utils/multigrid/halo_matrix_generator_iterator.hpp index 6eb469f21..2bd82ff35 100644 --- a/include/graphblas/utils/geometry/halo_matrix_generator_iterator.hpp +++ b/include/graphblas/utils/multigrid/halo_matrix_generator_iterator.hpp @@ -1,6 +1,6 @@ -#ifndef _H_GRB_ALGORITHMS_GEOMETRY_HALO_MATRIX_GENRATOR_ITERATOR -#define _H_GRB_ALGORITHMS_GEOMETRY_HALO_MATRIX_GENRATOR_ITERATOR +#ifndef _H_GRB_ALGORITHMS_MULTIGRID_HALO_MATRIX_GENRATOR_ITERATOR +#define _H_GRB_ALGORITHMS_MULTIGRID_HALO_MATRIX_GENRATOR_ITERATOR #include @@ -10,8 +10,8 @@ #include "array_vector_storage.hpp" namespace grb { - namespace algorithms { - namespace geometry { + namespace utils { + namespace multigrid { template< size_t DIMS, @@ -26,8 +26,7 @@ namespace grb { using RowIndexType = CoordType; ///< numeric type of rows using ColumnIndexType = CoordType; - - using LinearSystemType = grb::utils::geometry::LinearizedHaloNDimSystem< RowIndexType, DIMS >; + using LinearSystemType = LinearizedHaloNDimSystem< DIMS, RowIndexType >; using SelfType = HaloMatrixGeneratorIterator< DIMS, CoordType, ValueType, ValueCallable >; using Iterator = typename LinearSystemType::Iterator; @@ -92,12 +91,8 @@ namespace grb { HaloMatrixGeneratorIterator( const SelfType & ) = default; - // HaloMatrixGeneratorIterator( SelfType && ) = default; - SelfType & operator=( const SelfType & ) = default; - // SelfType & operator=( SelfType && ) = default; - /** * @brief Increments the iterator by moving coordinates to the next (row, column) to iterate on. * @@ -160,14 +155,14 @@ namespace grb { } /** - * @brief Returns current row. + * @brief Returns the current row. */ inline RowIndexType i() const { return _val.i(); } /** - * @brief Returns current column. + * @brief Returns the current column. */ inline ColumnIndexType j() const { return _val.j(); @@ -183,10 +178,6 @@ namespace grb { return _val.v(); } - const Iterator & it() const { - return this->_sys_iter; - } - private: value_type _val; const LinearSystemType *_lin_system; @@ -198,10 +189,8 @@ namespace grb { } }; - - - } // namespace geometry + } // namespace multigrid } // namespace utils } // namespace grb -#endif // _H_GRB_ALGORITHMS_GEOMETRY_HALO_MATRIX_GENRATOR_ITERATOR +#endif // _H_GRB_ALGORITHMS_MULTIGRID_HALO_MATRIX_GENRATOR_ITERATOR diff --git a/include/graphblas/utils/geometry/linearized_halo_ndim_geometry.hpp b/include/graphblas/utils/multigrid/linearized_halo_ndim_geometry.hpp similarity index 81% rename from include/graphblas/utils/geometry/linearized_halo_ndim_geometry.hpp rename to include/graphblas/utils/multigrid/linearized_halo_ndim_geometry.hpp index 04928ac09..0e53dd671 100644 --- a/include/graphblas/utils/geometry/linearized_halo_ndim_geometry.hpp +++ b/include/graphblas/utils/multigrid/linearized_halo_ndim_geometry.hpp @@ -1,6 +1,6 @@ -#ifndef _H_GRB_ALGORITHMS_GEOMETRY_LINEARIZED_HALO_NDIM_GEOMETRY -#define _H_GRB_ALGORITHMS_GEOMETRY_LINEARIZED_HALO_NDIM_GEOMETRY +#ifndef _H_GRB_ALGORITHMS_MULTIGRID_LINEARIZED_HALO_NDIM_GEOMETRY +#define _H_GRB_ALGORITHMS_MULTIGRID_LINEARIZED_HALO_NDIM_GEOMETRY #include #include @@ -9,6 +9,7 @@ #include #include #include +#include #include "array_vector_storage.hpp" #include "dynamic_vector_storage.hpp" @@ -17,14 +18,17 @@ namespace grb { namespace utils { - namespace geometry { + namespace multigrid { - template< typename CoordType, size_t DIMS > void __compute_neighbors_range( - const ArrayVectorStorage< CoordType, DIMS >& _system_sizes, + template< + size_t DIMS, + typename CoordType + > void __compute_neighbors_range( + const ArrayVectorStorage< DIMS, CoordType > &_system_sizes, const CoordType halo, - const ArrayVectorStorage< CoordType, DIMS >& system_coordinates, - ArrayVectorStorage< CoordType, DIMS >& neighbors_start, - ArrayVectorStorage< CoordType, DIMS >& neighbors_range ) { + const ArrayVectorStorage< DIMS, CoordType > &system_coordinates, + ArrayVectorStorage< DIMS, CoordType > &neighbors_start, + ArrayVectorStorage< DIMS, CoordType > &neighbors_range ) { for( CoordType i{0}; i < DIMS/* - 1*/; i++ ) { const CoordType start{ system_coordinates[i] <= halo ? 0 : system_coordinates[i] - halo }; @@ -32,37 +36,28 @@ namespace grb { neighbors_start[i] = start; neighbors_range[i] = end - start + 1; } - /* - const size_t last{ DIMS - 1 }; - const CoordT start{ system_coordinates[ last ] <= halo ? 0 : system_coordinates[ last ] - halo }; - const CoordT end{ system_coordinates[ last ] + halo }; // can extend beyond actual DIMS-dimensional space - neighbors_start[ last ] = start; - neighbors_range[ last ] = end - start + 1; - */ } - - - - - - template< typename CoordType, size_t DIMS > size_t __neighbour_to_system_coords( - const std::array< CoordType, DIMS > & sizes, + template< + size_t DIMS, + typename CoordType + > size_t __neighbour_to_system_coords( + const std::array< CoordType, DIMS > &sizes, size_t system_size, - const std::vector< NDimVector< CoordType, CoordType, DynamicVectorStorage< CoordType > > > & dimension_neighbors, + const std::vector< NDimVector< CoordType, CoordType, DynamicVectorStorage< CoordType > > > + &dimension_neighbors, CoordType halo, CoordType neighbor, - ArrayVectorStorage< CoordType, DIMS > & result) { - + ArrayVectorStorage< DIMS, CoordType > &result + ){ if( neighbor > system_size ) { throw std::invalid_argument("neighbor number ( " + std::to_string(neighbor) + " ) >= system size ( " + std::to_string( system_size ) + " )"); } - - ArrayVectorStorage< CoordType, DIMS > halo_coords( DIMS ); - #ifdef DBG + ArrayVectorStorage< DIMS, CoordType > halo_coords( DIMS ); +#ifdef _DEBUG size_t * const halo_coords_end{ halo_coords.data() + DIMS }; - #endif +#endif std::fill_n( halo_coords.begin(), DIMS, 0 ); for( size_t _dim{DIMS}; _dim > 0; _dim--) { @@ -72,13 +67,11 @@ namespace grb { const NDimVector< CoordType, CoordType, DynamicVectorStorage< CoordType > > & neighbors{ dimension_neighbors[dimension] }; CoordType * const halo_coords_begin{ halo_coords.data() + dimension }; - - #ifdef DBG +#ifdef _DEBUG std::cout << "DIMENSION " << dimension << std::endl << "- setup - neighbour " << neighbor << std::endl; std::cout << "\thalo : "; print_sequence( halo_coords_begin, halo_coords_end ) << std::endl; - #endif - +#endif size_t h{0}; size_t previous_neighs{ 0 }; *halo_coords_begin = h; @@ -90,47 +83,44 @@ namespace grb { previous_neighs += halo_max_neighs; halo_max_neighs = neighbors.at( halo_coords_begin ); } - #ifdef DBG +#ifdef _DEBUG std::cout << "- initial halo - neighbour " << neighbor << std::endl; std::cout << "\th " << h << std::endl; std::cout << "\thalo : "; print_sequence( halo_coords_begin, halo_coords_end ) << std::endl; std::cout << "\thalo_max_neighs " << halo_max_neighs << std::endl; - #endif - - +#endif if ( h < halo ){ result[dimension] = h; neighbor -= previous_neighs; - #ifdef DBG +#ifdef _DEBUG std::cout << "end neighbour " << neighbor << std::endl; - #endif +#endif continue; } // saturation occurred const size_t distance_from_halo{ ( neighbor - previous_neighs ) / halo_max_neighs }; - #ifdef DBG +#ifdef _DEBUG std::cout << "- before middle elements - neighbour " << neighbor << std::endl; std::cout << "\tprevious_neighs " << previous_neighs << std::endl; std::cout << "\thalo_max_neighs " << halo_max_neighs << std::endl; std::cout << "\tdistance_from_halo " << distance_from_halo << std::endl; std::cout << "\tdimension_size " << dimension_size << std::endl; - #endif +#endif if ( distance_from_halo < dimension_size - 2 * halo ) { result[dimension] = distance_from_halo + halo; neighbor -= (previous_neighs + distance_from_halo * halo_max_neighs) ; - #ifdef DBG +#ifdef _DEBUG std::cout << "end neighbour " << neighbor << std::endl; - #endif +#endif continue; } previous_neighs += ( dimension_size - 2 * halo ) * halo_max_neighs; - #ifdef DBG +#ifdef _DEBUG std::cout << "- after middle elements -neighbour " << neighbor << std::endl; std::cout << "\tprevious_neighs " << previous_neighs << std::endl; std::cout << "\thalo_max_neighs " << halo_max_neighs << std::endl; - #endif - +#endif h = halo - 1; *halo_coords_begin = h; halo_max_neighs = neighbors.at( halo_coords_begin ); @@ -141,27 +131,27 @@ namespace grb { halo_max_neighs = neighbors.at( halo_coords_begin ); } neighbor -= previous_neighs; - #ifdef DBG +#ifdef _DEBUG std::cout << "- final halo - neighbour " << neighbor << std::endl; std::cout << "\tadding h " << h << " previous_neighs " << previous_neighs << std::endl; - #endif +#endif // ( dimension_size - 1 ) because coordinates are 0-based and neighbor // is "inside" range [ previous_neighs, previous_neighs + halo_max_neighs ] result[dimension] = dimension_size - 1 - h; - #ifdef DBG +#ifdef _DEBUG std::cout << "end neighbour " << neighbor << std::endl; - #endif +#endif } - return neighbor; } template< typename CoordType > size_t __accumulate_dimension_neighbours( - const NDimVector< CoordType, CoordType, DynamicVectorStorage< CoordType > >& prev_neighs, + const NDimVector< CoordType, CoordType, DynamicVectorStorage< CoordType > > &prev_neighs, CoordType* coords_buffer, size_t halo, - size_t local_size ) { + size_t local_size + ) { size_t neighs{0}; size_t h{0}; for( ; h < halo && local_size > 1; h++ ) { @@ -188,17 +178,20 @@ namespace grb { } } - template< typename CoordType, size_t DIMS > size_t __init_halo_search( - typename LinearizedNDimSystem< CoordType, ArrayVectorStorage< CoordType, DIMS > >::ConstVectorReference sizes, + template< + typename CoordType, + size_t DIMS + > size_t __init_halo_search( + typename LinearizedNDimSystem< CoordType, ArrayVectorStorage< DIMS, CoordType > >::ConstVectorReference + sizes, size_t halo, - std::vector< NDimVector< CoordType, CoordType, DynamicVectorStorage< CoordType > > >& dimension_limits ) { - + std::vector< NDimVector< CoordType, CoordType, DynamicVectorStorage< CoordType > > >& dimension_limits + ) { using nd_vec = NDimVector< CoordType, CoordType, DynamicVectorStorage< CoordType > >; using nd_vec_iterator = typename nd_vec::DomainIterator; std::vector halo_sizes( DIMS, halo + 1); dimension_limits.emplace_back(halo_sizes); - // initialize values __populate_halo_neighbors< CoordType >( halo, dimension_limits[0] ); for( size_t i{1}; i < DIMS; i++ ) { @@ -226,8 +219,8 @@ namespace grb { return __accumulate_dimension_neighbours( dimension_limits[DIMS - 1], prev_coords, halo, sizes.back() ); } - } // namespace geometry + } // namespace multigrid } // namespace utils } // namespace grb -#endif // _H_GRB_ALGORITHMS_GEOMETRY_LINEARIZED_HALO_NDIM_GEOMETRY +#endif // _H_GRB_ALGORITHMS_MULTIGRID_LINEARIZED_HALO_NDIM_GEOMETRY diff --git a/include/graphblas/utils/geometry/linearized_halo_ndim_iterator.hpp b/include/graphblas/utils/multigrid/linearized_halo_ndim_iterator.hpp similarity index 63% rename from include/graphblas/utils/geometry/linearized_halo_ndim_iterator.hpp rename to include/graphblas/utils/multigrid/linearized_halo_ndim_iterator.hpp index 9829fdb46..871d62b7c 100644 --- a/include/graphblas/utils/geometry/linearized_halo_ndim_iterator.hpp +++ b/include/graphblas/utils/multigrid/linearized_halo_ndim_iterator.hpp @@ -15,8 +15,8 @@ * limitations under the License. */ -#ifndef _H_GRB_ALGORITHMS_GEOMETRY_LINEARIZED_HALO_NDIM_ITERATOR -#define _H_GRB_ALGORITHMS_GEOMETRY_LINEARIZED_HALO_NDIM_ITERATOR +#ifndef _H_GRB_ALGORITHMS_MULTIGRID_LINEARIZED_HALO_NDIM_ITERATOR +#define _H_GRB_ALGORITHMS_MULTIGRID_LINEARIZED_HALO_NDIM_ITERATOR #include #include @@ -31,27 +31,26 @@ namespace grb { namespace utils { - namespace geometry { + namespace multigrid { // forward declaration template< - typename SizeType, - size_t DIMS + size_t DIMS, + typename SizeType > class LinearizedHaloNDimSystem; template< - typename SizeType, - size_t DIMS + size_t DIMS, + typename SizeType > class LinearizedHaloNDimIterator { - using SystemType = LinearizedHaloNDimSystem< SizeType, DIMS >; - using VectorType = ArrayVectorStorage< SizeType, DIMS >; + using SystemType = LinearizedHaloNDimSystem< DIMS, SizeType >; + using VectorType = ArrayVectorStorage< DIMS, SizeType >; using VectorIteratorType = LinearizedNDimIterator< SizeType, VectorType >; public: - //using VectorType = typename VectorIteratorType::VectorType; using ConstVectorReference = typename VectorIteratorType::ConstVectorReference; - using SelfType = LinearizedHaloNDimIterator< SizeType, DIMS >; + using SelfType = LinearizedHaloNDimIterator< DIMS, SizeType >; struct HaloNDimElement { private: @@ -62,10 +61,7 @@ namespace grb { // for iteration VectorIteratorType _element_iter; // coordinates iterator - //VectorType* _element; - //size_t _coordinates_linear; VectorType _neighbor; //the current neighbor - //size_t _neighbor_linear; SizeType _position; public: @@ -88,8 +84,6 @@ namespace grb { HaloNDimElement& operator=( const HaloNDimElement& ) = default; - //HaloNDimElement& operator=( HaloNDimElement&& ) = delete; - ConstVectorReference get_element() const { return this->_element_iter->get_position(); } @@ -127,45 +121,22 @@ namespace grb { inline void __update_neighbor() { for( size_t i{0}; i < DIMS; i++ ) { - //(this->_point)._neighbor[i] = this->_neighbors_start[i] + (*(this->_neighbor_iter))[i]; this->_point._neighbor[i] = this->_neighbors_start[i] + this->_neighbor_iter->get_position()[i]; } } - /* - void __update_neighbor_linear() { - (this->_point)._neighbor_linear = - this->_system.ndim_to_linear( this->_point._neighbor ); - } - */ - inline void on_neighbor_iter_update() { this->__update_neighbor(); - //this->__update_neighbor_linear(); } - /* - void __update_coordinates_linear() { - (this->_point)._coordinates_linear = - this->_system.ndim_to_linear( *this->_element_iter ); - } - */ - void on_element_update() { - //this->__update_coordinates_linear(); // reset everything VectorType neighbors_range( DIMS ); this->_point._system->compute_neighbors_range( - //*(this->_point._element_iter), this->_point._element_iter->get_position(), this->_neighbors_start, neighbors_range ); - /* - std::cout << "\t=== start "; - print( this->_neighbors_start ) << " range "; - print( neighbors_range ) << std::endl; - */ // re-target _neighbors_linearizer this->_neighbors_linearizer.retarget( neighbors_range ); } @@ -193,48 +164,11 @@ namespace grb { std::fill_n( this->_neighbors_start.begin(), DIMS, 0 ); } - - /* - LinearizedHaloNDimIterator( const LinearizedHaloNDimIterator< SizeType, DIMS >& original ) noexcept: - _coordinates_linearizer( original._coordinates_linearizer ), - _halo( original._halo ), - _dimension_limits( original._dimension_limits ), - _neighbors_linearizer( original._neighbors_linearizer ), - _element_iter( original._element_iter ), - _neighbor_iter( original._neighbor_iter ), - _neighbor_end( original._neighbor_end ), - _neighbors_start( original._neighbors_start ), - _point( original._point ) {} - */ - LinearizedHaloNDimIterator( const SelfType & ) = default; - //LinearizedHaloNDimIterator( SelfType &&original ) = delete; - - /* - LinearizedHaloNDimIterator< SizeType, DIMS >& operator=( - const LinearizedHaloNDimIterator< SizeType, DIMS >& original ) noexcept { - this->_coordinates_linearizer = original._coordinates_linearizer; - this->_halo = original._halo; - this->_dimension_limits = original._dimension_limits; - this->_neighbors_linearizer = original._neighbors_linearizer; - this->_element_iter = original._element_iter; - this->_coordinates_linear = original._coordinates_linear; - this->_neighbor_iter = original._neighbor_iter; - this->_neighbor_end = original._neighbor_end; - this->_neighbor = original._neighbor; - this->_neighbors_start = original._neighbors_start; - this->_neighbor_linear = original._neighbor_linear; - } - */ - SelfType & operator=( const SelfType & ) = default; - //SelfType & operator=( SelfType && ) = delete; - bool operator!=( const SelfType &other ) const { - //return (this->_point)._coordinates_linear != (other._point)._coordinates_linear - // || (this->_point)._neighbor_linear != (other._point)._neighbor_linear; return this->_point._position != other._point._position; // use linear coordinate } @@ -251,12 +185,6 @@ namespace grb { } void next_neighbour() { - /* - std::cout << "sizes: " << this->_neighbors_linearizer.get_sizes() - << " offset " << this->_neighbor_iter->get_position() << " -> " - << this->_neighbors_linearizer.ndim_to_linear_offset( this->_neighbor_iter->get_position() ) - << std::endl; - */ ++(this->_neighbor_iter); this->on_neighbor_iter_update(); this->_point._position++; @@ -269,11 +197,9 @@ namespace grb { void next_element() { size_t num_neighbours = this->_neighbors_linearizer.system_size(); size_t neighbour_position_offset = - this->_neighbors_linearizer.ndim_to_linear_offset( this->_neighbor_iter->get_position() ); - // std::cout << " num_neighbours " << num_neighbours << " offset " << neighbour_position_offset << std::endl; + this->_neighbors_linearizer.ndim_to_linear( this->_neighbor_iter->get_position() ); ++(this->_point._element_iter); this->on_element_advance(); - // this->_point._position++; this->_point._position -= neighbour_position_offset; this->_point._position += num_neighbours; } @@ -282,7 +208,6 @@ namespace grb { ++(this->_neighbor_iter); if( !has_more_neighbours() ) { ++(this->_point._element_iter); - //this->_coordinates_linear = this->_coordinates_linearizer.ndim_to_linear( this->_element_iter ); this->on_element_advance(); } else { @@ -303,11 +228,7 @@ namespace grb { VectorType final_element( DIMS ); size_t neighbor_index{ (this->_point._system->neighbour_linear_to_element( final_position, final_element )) }; - // std::cout << "\t=== element " << offset << " -- "; - // std::cout << final_element[0] << " " << final_element[0] << std::endl; - this->_point._element_iter = VectorIteratorType( *this->_point._system, final_element.cbegin() ); - //this->_point._element = &( *this->_element_iter ); this->_point._position = final_position; this->on_element_update(); @@ -329,7 +250,7 @@ namespace grb { size_t a_pos{ _point.get_position() }, b_pos{ other._point.get_position() }; // std::cout << "diff " << a_pos << " - " << b_pos << std::endl; size_t lowest{ std::min( a_pos, b_pos ) }, highest{ std::max( a_pos, b_pos )}; - using diff_t = typename LinearizedHaloNDimIterator< SizeType, DIMS >::difference_type; + using diff_t = typename LinearizedHaloNDimIterator< DIMS, SizeType >::difference_type; if( highest - lowest > static_cast< size_t >( std::numeric_limits< diff_t >::max() ) ) { @@ -339,47 +260,21 @@ namespace grb { return ( static_cast< diff_t >( a_pos - b_pos ) ); } - - - // implementation depending on logic in operator++ static SelfType make_system_end_iterator( const SystemType& system ) { SelfType result( system ); - /* - std::cout << "result 0: element "; - print(result->get_element()) << " neighbor "; - print(result->get_neighbor()) << std::endl; - */ - // go to the very first point outside of space result._point._element_iter = VectorIteratorType::make_system_end_iterator( system ); - /* - std::cout << "result 1: element "; - print(result->get_element()) << " neighbor "; - print(result->get_neighbor()) << std::endl; - */ - result.on_element_advance(); result._point._position = system.halo_system_size(); - //std::cout << "got sys size " << system.halo_system_size() << std::endl; return result; } - }; - /* - template< typename SizeType, size_t DIMS > LinearizedHaloNDimIterator< SizeType, DIMS > - operator+( const LinearizedHaloNDimIterator< SizeType, DIMS >& original, size_t increment ) { - LinearizedHaloNDimIterator< SizeType, DIMS > res( original ); - return ( res += increment ); - } - */ - - - } // namespace geometry + } // namespace multigrid } // namespace utils } // namespace grb -#endif // _H_GRB_ALGORITHMS_GEOMETRY_LINEARIZED_HALO_NDIM_ITERATOR +#endif // _H_GRB_ALGORITHMS_MULTIGRID_LINEARIZED_HALO_NDIM_ITERATOR diff --git a/include/graphblas/utils/geometry/linearized_halo_ndim_system.hpp b/include/graphblas/utils/multigrid/linearized_halo_ndim_system.hpp similarity index 82% rename from include/graphblas/utils/geometry/linearized_halo_ndim_system.hpp rename to include/graphblas/utils/multigrid/linearized_halo_ndim_system.hpp index af296cc9f..cc84de621 100644 --- a/include/graphblas/utils/geometry/linearized_halo_ndim_system.hpp +++ b/include/graphblas/utils/multigrid/linearized_halo_ndim_system.hpp @@ -1,6 +1,6 @@ -#ifndef _H_GRB_ALGORITHMS_GEOMETRY_LINEARIZED_HALO_NDIM_SYSTEM -#define _H_GRB_ALGORITHMS_GEOMETRY_LINEARIZED_HALO_NDIM_SYSTEM +#ifndef _H_GRB_ALGORITHMS_MULTIGRID_LINEARIZED_HALO_NDIM_SYSTEM +#define _H_GRB_ALGORITHMS_MULTIGRID_LINEARIZED_HALO_NDIM_SYSTEM #include #include @@ -15,21 +15,20 @@ namespace grb { namespace utils { - namespace geometry { + namespace multigrid { // only with ArrayVectorStorage template< - typename SizeType, - size_t DIMS + size_t DIMS, + typename SizeType > class LinearizedHaloNDimSystem: - public LinearizedNDimSystem< SizeType, ArrayVectorStorage< SizeType, DIMS > > { + public LinearizedNDimSystem< SizeType, ArrayVectorStorage< DIMS, SizeType > > { public: - - using VectorType = ArrayVectorStorage< SizeType, DIMS >; + using VectorType = ArrayVectorStorage< DIMS, SizeType >; using ConstVectorStorageType = typename VectorType::ConstVectorStorageType; - using SelfType = LinearizedHaloNDimSystem< SizeType, DIMS >; + using SelfType = LinearizedHaloNDimSystem< DIMS, SizeType >; using BaseType = LinearizedNDimSystem< SizeType, VectorType >; - using Iterator = LinearizedHaloNDimIterator< SizeType, DIMS >; + using Iterator = LinearizedHaloNDimIterator< DIMS, SizeType >; LinearizedHaloNDimSystem( ConstVectorStorageType sizes, SizeType halo ): BaseType( sizes.cbegin(), sizes.cend() ), @@ -102,15 +101,13 @@ namespace grb { } private: - const SizeType _halo; std::vector< NDimVector< SizeType, SizeType, DynamicVectorStorage< SizeType > > > _dimension_limits; size_t _system_size; - }; - } // namespace geometry + } // namespace multigrid } // namespace utils } // namespace grb -#endif // _H_GRB_ALGORITHMS_GEOMETRY_LINEARIZED_HALO_NDIM_SYSTEM +#endif // _H_GRB_ALGORITHMS_MULTIGRID_LINEARIZED_HALO_NDIM_SYSTEM diff --git a/include/graphblas/utils/geometry/linearized_ndim_iterator.hpp b/include/graphblas/utils/multigrid/linearized_ndim_iterator.hpp similarity index 91% rename from include/graphblas/utils/geometry/linearized_ndim_iterator.hpp rename to include/graphblas/utils/multigrid/linearized_ndim_iterator.hpp index 60f424164..f65ec8831 100644 --- a/include/graphblas/utils/geometry/linearized_ndim_iterator.hpp +++ b/include/graphblas/utils/multigrid/linearized_ndim_iterator.hpp @@ -15,8 +15,8 @@ * limitations under the License. */ -#ifndef _H_GRB_ALGORITHMS_GEOMETRY_NDIM_ITERATOR -#define _H_GRB_ALGORITHMS_GEOMETRY_NDIM_ITERATOR +#ifndef _H_GRB_ALGORITHMS_MULTIGRID_NDIM_ITERATOR +#define _H_GRB_ALGORITHMS_MULTIGRID_NDIM_ITERATOR #include #include @@ -29,7 +29,7 @@ namespace grb { namespace utils { - namespace geometry { + namespace multigrid { // forward declaration for default template< @@ -42,7 +42,6 @@ namespace grb { typename InternalVectorType > class LinearizedNDimIterator { public: - using VectorType = InternalVectorType; using LinNDimSysType = LinearizedNDimSystem< SizeType, VectorType >; using ConstVectorReference = const VectorType&; @@ -50,12 +49,10 @@ namespace grb { struct NDimPoint { private: - const LinNDimSysType* system; // pointer because of copy assignment VectorType coords; public: - friend SelfType; NDimPoint() = delete; @@ -82,7 +79,6 @@ namespace grb { } }; - // interface for std::random_access_iterator using iterator_category = std::random_access_iterator_tag; using value_type = NDimPoint; @@ -107,10 +103,6 @@ namespace grb { SelfType& operator=( const SelfType &original ) = default; - // LinearizedNDimIterator( SelfType && ) = delete; - - // SelfType operator=( SelfType && ) = delete; - ~LinearizedNDimIterator() {} SelfType & operator++() noexcept { @@ -119,11 +111,6 @@ namespace grb { for( size_t i { 0 }; i < this->_p.system->dimensions() - 1 && rewind; i++ ) { SizeType& coord = this->_p.coords[ i ]; // must rewind dimension if we wrap-around - /* - SizeType new_coord = ( coord + 1 ) % this->_p.system->get_sizes()[ i ]; - rewind = new_coord < coord; - coord = new_coord; - */ SizeType plus = coord + 1; rewind = plus >= this->_p.system->get_sizes()[ i ]; coord = rewind ? 0 : plus; @@ -187,11 +174,10 @@ namespace grb { private: NDimPoint _p; - }; - } // namespace geometry + } // namespace multigrid } // namespace utils } // namespace grb -#endif // _H_GRB_ALGORITHMS_GEOMETRY_NDIM_ITERATOR +#endif // _H_GRB_ALGORITHMS_MULTIGRID_NDIM_ITERATOR diff --git a/include/graphblas/utils/geometry/linearized_ndim_system.hpp b/include/graphblas/utils/multigrid/linearized_ndim_system.hpp similarity index 91% rename from include/graphblas/utils/geometry/linearized_ndim_system.hpp rename to include/graphblas/utils/multigrid/linearized_ndim_system.hpp index 87352aa19..3e4c15b14 100644 --- a/include/graphblas/utils/geometry/linearized_ndim_system.hpp +++ b/include/graphblas/utils/multigrid/linearized_ndim_system.hpp @@ -15,8 +15,8 @@ * limitations under the License. */ -#ifndef _H_GRB_ALGORITHMS_GEOMETRY_NDIM_SYSTEM_LINEARIZER -#define _H_GRB_ALGORITHMS_GEOMETRY_NDIM_SYSTEM_LINEARIZER +#ifndef _H_GRB_ALGORITHMS_MULTIGRID_NDIM_SYSTEM_LINEARIZER +#define _H_GRB_ALGORITHMS_MULTIGRID_NDIM_SYSTEM_LINEARIZER #include #include @@ -41,7 +41,7 @@ namespace grb { namespace utils { - namespace geometry { + namespace multigrid { /** * Extends a \p NDimSystem by linearizing it, i.e. it provides facilities to map a vector in @@ -60,7 +60,6 @@ namespace grb { typename SizeType, typename InternalVectorType > class LinearizedNDimSystem: public NDimSystem< SizeType, InternalVectorType > { - public: static_assert( std::is_integral< SizeType >::value, "SizeType must be an integral type"); @@ -160,17 +159,6 @@ namespace grb { return linear; } - // probably same as ndim_to_linear !!! - size_t ndim_to_linear_offset( ConstVectorStorageType ndim_vector ) const { - size_t linear{ 0 }; - size_t steps{ 1 }; - for( size_t i{ 0 }; i < this->dimensions(); i++ ) { - linear += steps * ndim_vector[i]; - steps *= this->_sizes[i]; - } - return linear; - } - // must be same dimensionality void retarget( ConstVectorReference _new_sizes ) { if( _new_sizes.dimensions() != this->_sizes.dimensions() ) { @@ -191,11 +179,10 @@ namespace grb { } private: - VectorType offsets; size_t _system_size; - template< + template< typename IterIn, typename IterOut > static size_t compute_offsets( IterIn in_begin, IterIn in_end, IterOut out_begin ) { @@ -208,9 +195,8 @@ namespace grb { } }; - - } // namespace geometry + } // namespace multigrid } // namespace utils } // namespace grb -#endif // _H_GRB_ALGORITHMS_GEOMETRY_NDIM_SYSTEM_LINEARIZER +#endif // _H_GRB_ALGORITHMS_MULTIGRID_NDIM_SYSTEM_LINEARIZER diff --git a/include/graphblas/utils/geometry/ndim_system.hpp b/include/graphblas/utils/multigrid/ndim_system.hpp similarity index 88% rename from include/graphblas/utils/geometry/ndim_system.hpp rename to include/graphblas/utils/multigrid/ndim_system.hpp index f9a97c18d..9d387ce32 100644 --- a/include/graphblas/utils/geometry/ndim_system.hpp +++ b/include/graphblas/utils/multigrid/ndim_system.hpp @@ -15,8 +15,8 @@ * limitations under the License. */ -#ifndef _H_GRB_ALGORITHMS_GEOMETRY_NDIM_SYSTEM -#define _H_GRB_ALGORITHMS_GEOMETRY_NDIM_SYSTEM +#ifndef _H_GRB_ALGORITHMS_MULTIGRID_NDIM_SYSTEM +#define _H_GRB_ALGORITHMS_MULTIGRID_NDIM_SYSTEM #include #include @@ -37,7 +37,7 @@ namespace grb { namespace utils { - namespace geometry { + namespace multigrid { /** * Describes a #dimensions()-dimensional system by storing its size along each dimension. @@ -52,7 +52,6 @@ namespace grb { typename SizeType, typename InternalVectorType > class NDimSystem { - public: static_assert( std::is_integral< SizeType >::value, "SizeType must be an integral type"); @@ -71,7 +70,7 @@ namespace grb { * @param begin range begin * @param end end of range */ - template< typename IterT > NDimSystem( IterT begin, IterT end) noexcept : + template< typename IterType > NDimSystem( IterType begin, IterType end) noexcept : _sizes( std::distance( begin, end ) ) { std::copy( begin, end, this->_sizes.begin() ); @@ -98,13 +97,8 @@ namespace grb { NDimSystem( const SelfType & ) = default; - // NDimSystem( SelfType && ) = default; - - // NDimSystem( SelfType &&original ) noexcept: _sizes( std::move( original._sizes ) ) {} NDimSystem( SelfType && ) = delete; - ~NDimSystem() {} - SelfType & operator=( const SelfType &original ) = default; SelfType & operator=( SelfType &&original ) = delete; @@ -122,12 +116,11 @@ namespace grb { } protected: - InternalVectorType _sizes; }; - } // namespace geometry + } // namespace multigrid } // namespace utils } // namespace grb -#endif // _H_GRB_ALGORITHMS_GEOMETRY_NDIM_SYSTEM +#endif // _H_GRB_ALGORITHMS_MULTIGRID_NDIM_SYSTEM diff --git a/include/graphblas/utils/geometry/ndim_vector.hpp b/include/graphblas/utils/multigrid/ndim_vector.hpp similarity index 95% rename from include/graphblas/utils/geometry/ndim_vector.hpp rename to include/graphblas/utils/multigrid/ndim_vector.hpp index eca89137e..26ee084e6 100644 --- a/include/graphblas/utils/geometry/ndim_vector.hpp +++ b/include/graphblas/utils/multigrid/ndim_vector.hpp @@ -1,6 +1,6 @@ -#ifndef _H_GRB_ALGORITHMS_GEOMETRY_NDIM_VECTOR -#define _H_GRB_ALGORITHMS_GEOMETRY_NDIM_VECTOR +#ifndef _H_GRB_ALGORITHMS_MULTIGRID_NDIM_VECTOR +#define _H_GRB_ALGORITHMS_MULTIGRID_NDIM_VECTOR #include #include @@ -12,7 +12,7 @@ namespace grb { namespace utils { - namespace geometry { + namespace multigrid { /** * Maps an N-dimensional vector to an array of data. @@ -37,7 +37,6 @@ namespace grb { typename SizeType, typename InternalVectorType > class NDimVector { - public: static_assert( std::is_default_constructible< DataType >::value, "the stored type is not default constructible" ); @@ -133,8 +132,8 @@ namespace grb { } }; - } // namespace geometry + } // namespace multigrid } // namespace utils } // namespace grb -#endif // _H_GRB_ALGORITHMS_GEOMETRY_NDIM_VECTOR +#endif // _H_GRB_ALGORITHMS_MULTIGRID_NDIM_VECTOR diff --git a/tests/smoke/hpcg.cpp b/tests/smoke/hpcg.cpp index 2b544fb16..ebed53096 100644 --- a/tests/smoke/hpcg.cpp +++ b/tests/smoke/hpcg.cpp @@ -43,7 +43,6 @@ // here we define a custom macro and do not use NDEBUG since the latter is not defined for smoke tests #ifdef HPCG_PRINT_STEPS - #include // HPCG_PRINT_STEPS requires defining the following symbols @@ -52,10 +51,8 @@ * @brief simply prints \p args on a dedicated line. */ #define DBG_println( args ) std::cout << args << std::endl; - // forward declaration for the tracing facility template< typename T > void print_norm( const grb::Vector< T > &r, const char * head ); - /** * @brief prints \p head and the norm of \p r. */ @@ -86,9 +83,8 @@ constexpr double SYSTEM_DIAG_VALUE { 26.0 }; constexpr double SYSTEM_NON_DIAG_VALUE { -1.0 }; constexpr size_t BAND_WIDTH_3D { 13UL }; constexpr size_t HALO_RADIUS { 1U }; -//============================================ - constexpr double MAX_NORM { 4.0e-14 }; +//============================================ using namespace grb; using namespace algorithms; @@ -110,7 +106,7 @@ struct system_input { * Container for the parameters for the HPCG simulation. */ struct simulation_input : public system_input { - size_t test_repetitions; + size_t inner_test_repetitions; size_t max_iterations; size_t smoother_steps; bool evaluation_run; @@ -128,20 +124,65 @@ using StdMinus = operators::subtract< NonzeroType >; using coord_t = size_t; /** - * Containers for test outputs. + * Container for test outputs. */ struct output { RC error_code = SUCCESS; - size_t test_repetitions = 0; - size_t performed_iterations = 0; - NonzeroType residual = 0.0; + size_t inner_test_repetitions = 0; grb::utils::TimerResults times; std::unique_ptr< PinnedVector< IOType > > pinnedVector; - NonzeroType square_norm_diff; + NonzeroType square_norm_diff = 0.0; + cg_out_data< NonzeroType > cg_out = { 0, 0.0 }; }; +using hpcg_runner_t = HPCGRunnerType< IOType, NonzeroType, InputType, ResidualType, + StdRing, StdMinus >; +using mg_data_t = multigrid_data< IOType, NonzeroType >; +using coarsening_data_t = coarsening_data< IOType, NonzeroType >; +using smoothing_data_t = smoother_data< IOType >; +using hpcg_data_t = mg_cg_data< IOType, NonzeroType, InputType >; + +#ifdef HPCG_PRINT_SYSTEM +static void print_system( + const std::vector< std::unique_ptr< mg_data_t > > &system_levels, + const std::vector< std::unique_ptr< coarsening_data_t > > &coarsener_levels +) { + assert( spmd<>::nprocs() == 1 ); // distributed printin of system not implemented + print_matrix( system_levels[ 0 ]->A, 70, "A" ); + for( size_t i = 0; i < coarsener_levels.size(); i++ ) { + print_matrix( coarsener_levels[i ] ->coarsening_matrix, 50, "COARSENING MATRIX" ); + print_matrix( system_levels[ i + 1 ]->A, 50, "COARSER SYSTEM MATRIX" ); + } +} +#endif + +#ifdef HPCG_PRINT_STEPS +template< + typename T, + class Ring +> void print_norm( const grb::Vector< T > & r, const char * head, const Ring & ring ) { + T norm = 0; + RC ret = grb::dot( norm, r, r, ring ); // norm = r' * r; + (void)ret; + assert( ret == SUCCESS ); + if( spmd<>::pid() != 0 ) { + return; + } + if( head != nullptr ) { + printf(">>> %s: %lf\n", head, norm ); + } else { + printf(">>> %lf\n", norm ); + } +} + +template< typename T > void print_norm( const grb::Vector< T > & r, const char * head ) { + return print_norm( r, head, StdRing() ); +} +#endif + + /** - * Returns the closets power of 2 bigger or equal to \p n . + * Returns the closest power of 2 bigger or equal to \p n . */ template< typename T > T static next_pow_2( T n ) { @@ -155,16 +196,41 @@ T static next_pow_2( T n ) { return n + 1; } -using hpcg_runner_t = HPCGRunnerType< IOType, NonzeroType, InputType, ResidualType, - StdRing, StdMinus >; -using mg_data_t = multigrid_data< IOType, NonzeroType >; -using coarsening_data_t = coarsening_data< IOType, NonzeroType >; -using smoothing_data_t = smoother_data< IOType >; -using hpcg_data_t = mg_cg_data< IOType, NonzeroType, InputType >; +static void allocate_system( + const std::vector< size_t > &mg_sizes, + std::vector< std::unique_ptr< mg_data_t > > &system_levels, + std::vector< std::unique_ptr< coarsening_data_t > > &coarsener_levels, + std::vector< std::unique_ptr< smoothing_data_t > > &smoother_levels, + std::unique_ptr< hpcg_data_t > &holder +) { + const size_t pid { spmd<>::pid() }; + grb::utils::Timer timer; + + hpcg_data_t *data{ new hpcg_data_t( mg_sizes[ 0 ] ) }; + holder = std::unique_ptr< hpcg_data_t >( data ); + MASTER_PRINT( pid, "allocating data for the MultiGrid simulation..."); + timer.reset(); + multigrid_allocate_data( mg_sizes, system_levels, coarsener_levels, smoother_levels ); + double time = timer.time(); + MASTER_PRINT( pid, " time (ms) " << time << std::endl ) + + // zero all vectors + MASTER_PRINT( pid, "zeroing all vectors..."); + timer.reset(); + grb::RC rc = data->zero_temp_vectors(); + ASSERT_RC_SUCCESS( rc ); + std::for_each( system_levels.begin(), system_levels.end(), + []( std::unique_ptr< mg_data_t > &s) { ASSERT_RC_SUCCESS( s->zero_temp_vectors() ); } ); + std::for_each( coarsener_levels.begin(), coarsener_levels.end(), + []( std::unique_ptr< coarsening_data_t > &s) { ASSERT_RC_SUCCESS( s->zero_temp_vectors() ); } ); + std::for_each( smoother_levels.begin(), smoother_levels.end(), + []( std::unique_ptr< smoothing_data_t > &s) { ASSERT_RC_SUCCESS( s->zero_temp_vectors() ); } ); + time = timer.time(); + MASTER_PRINT( pid, " time (ms) " << time << std::endl ); +} /** * Builds and initializes a 3D system for an HPCG simulation according to the given 3D system sizes. - * @return RC grb::SUCCESS if the system initialization within GraphBLAS succeeded */ static void build_3d_system( const system_input & in, @@ -174,11 +240,11 @@ static void build_3d_system( std::unique_ptr< hpcg_data_t > &holder ) { constexpr size_t DIMS = 3; - using builder_t = grb::algorithms::HPCGBuilder< DIMS, coord_t, NonzeroType >; + using builder_t = grb::algorithms::HPCGSystemBuilder< DIMS, coord_t, NonzeroType >; const size_t pid { spmd<>::pid() }; grb::utils::Timer timer; - hpcg_system_params< 3, NonzeroType > params { + hpcg_system_params< DIMS, NonzeroType > params { { in.nx, in.ny, in.nz }, HALO_RADIUS, SYSTEM_DIAG_VALUE, SYSTEM_NON_DIAG_VALUE, PHYS_SYSTEM_SIZE_MIN, in.max_coarsening_levels, 2 }; @@ -187,39 +253,18 @@ static void build_3d_system( MASTER_PRINT( pid, "building HPCG generators for " << ( in.max_coarsening_levels + 1 ) << " levels..." ); timer.reset(); - build_hpcg_multigrid_generators( params, mg_generators ); + hpcg_build_multigrid_generators( params, mg_generators ); double time = timer.time(); MASTER_PRINT( pid, " time (ms) " << time << std::endl ); MASTER_PRINT( pid, "built HPCG generators for " << mg_generators.size() << " levels" << std::endl ); - hpcg_data_t *data{ new hpcg_data_t( mg_generators[ 0 ].system_size() ) }; - holder = std::unique_ptr< hpcg_data_t >( data ); std::vector< size_t > mg_sizes; // exclude main system std::transform( mg_generators.cbegin(), mg_generators.cend(), std::back_inserter( mg_sizes ), [] ( const builder_t &b ) { return b.system_size(); } ); - - MASTER_PRINT( pid, "allocating data for the MultiGrid simulation..."); - timer.reset(); - allocate_multigrid_data( mg_sizes, system_levels, coarsener_levels, smoother_levels ); - time = timer.time(); - MASTER_PRINT( pid, " time (ms) " << time << std::endl ) - - // zero all vectors - MASTER_PRINT( pid, "zeroing all vectors..."); - timer.reset(); - data->zero_temp_vectors(); - std::for_each( system_levels.begin(), system_levels.end(), - []( std::unique_ptr< mg_data_t > &s) { s->zero_temp_vectors(); } ); - std::for_each( coarsener_levels.begin(), coarsener_levels.end(), - []( std::unique_ptr< coarsening_data_t > &s) { s->zero_temp_vectors(); } ); - std::for_each( smoother_levels.begin(), smoother_levels.end(), - []( std::unique_ptr< smoothing_data_t > &s) { s->zero_temp_vectors(); } ); - time = timer.time(); - MASTER_PRINT( pid, " time (ms) " << time << std::endl ) - + allocate_system( mg_sizes, system_levels, coarsener_levels, smoother_levels, holder ); assert( mg_generators.size() == system_levels.size() ); assert( mg_generators.size() == smoother_levels.size() ); assert( mg_generators.size() - 1 == coarsener_levels.size() ); @@ -228,63 +273,29 @@ static void build_3d_system( MASTER_PRINT( pid, "SYSTEM LEVEL " << i << std::endl ); MASTER_PRINT( pid, " populating system matrix: " ); timer.reset(); - populate_system_matrix( mg_generators[ i ], system_levels.at(i)->A ); + grb::RC rc = hpcg_populate_system_matrix( mg_generators[ i ], system_levels.at(i)->A ); time = timer.time(); + ASSERT_RC_SUCCESS( rc ); MASTER_PRINT( pid, " time (ms) " << time << std::endl ) MASTER_PRINT( pid, " populating smoothing data: " ); timer.reset(); - populate_smoothing_data( mg_generators[ i ], *smoother_levels[ i ] ); + rc = hpcg_populate_smoothing_data( mg_generators[ i ], *smoother_levels[ i ] ); time = timer.time(); + ASSERT_RC_SUCCESS( rc ); MASTER_PRINT( pid, " time (ms) " << time << std::endl ) if( i > 0 ) { MASTER_PRINT( pid, " populating coarsening data: " ); timer.reset(); - populate_coarsener( mg_generators[ i - 1 ], mg_generators[ i ], *coarsener_levels[ i - 1 ] ); + rc = hpcg_populate_coarsener( mg_generators[ i - 1 ], mg_generators[ i ], *coarsener_levels[ i - 1 ] ); time = timer.time(); + ASSERT_RC_SUCCESS( rc ); MASTER_PRINT( pid, " time (ms) " << time << std::endl ) } } } -#ifdef HPCG_PRINT_SYSTEM -static void print_system( - const std::vector< std::unique_ptr< mg_data_t > > &system_levels, - const std::vector< std::unique_ptr< coarsening_data_t > > &coarsener_levels -) { - print_matrix( system_levels[ 0 ]->A, 70, "A" ); - for( size_t i = 0; i < coarsener_levels.size(); i++ ) { - print_matrix( coarsener_levels[i ] ->coarsening_matrix, 50, "COARSENING MATRIX" ); - print_matrix( system_levels[ i + 1 ]->A, 50, "COARSER SYSTEM MATRIX" ); - } -} -#endif - -#ifdef HPCG_PRINT_STEPS -template< - typename T, - class Ring -> void print_norm( const grb::Vector< T > & r, const char * head, const Ring & ring ) { - T norm = 0; - RC ret = grb::dot( norm, r, r, ring ); // norm = r' * r; - (void)ret; - assert( ret == SUCCESS ); - if( head != nullptr ) { - printf(">>> %s: %lf\n", head, norm ); - } else { - printf(">>> %lf\n", norm ); - } -} - -template< typename T > void print_norm( const grb::Vector< T > & r, const char * head ) { - return print_norm( r, head, StdRing() ); -} -#endif - - - - /** * @brief Main test, building an HPCG problem and running the simulation closely following the * parameters in the reference HPCG test. @@ -292,14 +303,9 @@ template< typename T > void print_norm( const grb::Vector< T > & r, const char * void grbProgram( const simulation_input & in, struct output & out ) { // get user process ID const size_t pid { spmd<>::pid() }; - assert( pid < spmd<>::nprocs() ); - if( pid == 0 ) { - thcout << "beginning input generation..." << std::endl; - } - grb::utils::Timer timer; + MASTER_PRINT( pid, "beginning input generation..." << std::endl ); - // assume successful run - out.error_code = SUCCESS; + grb::utils::Timer timer; // wrap hpcg_data inside a unique_ptr to forget about cleaning chores std::unique_ptr< hpcg_data_t > hpcg_state; @@ -316,10 +322,7 @@ void grbProgram( const simulation_input & in, struct output & out ) { timer.reset(); build_3d_system( in, mg_runner.system_levels, coarsener.coarsener_levels, smoother.levels, hpcg_state ); double input_duration { timer.time() }; - - if( pid == 0 ) { - thcout << "input generation time (ms): " << input_duration << std::endl; - } + MASTER_PRINT( pid, "input generation time (ms): " << input_duration << std::endl ); #ifdef HPCG_PRINT_SYSTEM if( pid == 0 ) { @@ -347,88 +350,55 @@ void grbProgram( const simulation_input & in, struct output & out ) { out.times.preamble = timer.time(); - cg_out_data< NonzeroType > cg_out; + cg_out_data< NonzeroType > &cg_out = out.cg_out; mg_data_t &grid_base = *mg_runner.system_levels[ 0 ]; - if( in.evaluation_run ) { - out.test_repetitions = 0; - if( pid == 0 ) { - thcout << "beginning evaluation run..." << std::endl; - } - timer.reset(); - rc = hpcg_runner( grid_base, *hpcg_state, cg_out ); - double single_time = timer.time(); - if( rc == SUCCESS ) { - rc = collectives<>::reduce( single_time, 0, operators::max< double >() ); - } - if( rc != SUCCESS ) { - thcerr << "error during evaluation run" << std::endl; - out.error_code = rc; - return; - } - out.times.useful = single_time; - out.test_repetitions = static_cast< size_t >( 1000.0 / single_time ) + 1; - out.performed_iterations = cg_out.iterations; - out.residual = cg_out.norm_residual; - - if( pid == 0 ) { - thcout << "Evaluation run" << std::endl; - } - - std::cout << " iterations: " << out.performed_iterations << std::endl - << " computed residual: " << out.residual << std::endl - << " time taken (ms): " << out.times.useful << std::endl - << " deduced inner repetitions for 1s duration: " << out.test_repetitions << std::endl; - return; - } // do a cold run to warm the system up - if( pid == 0 ) { - thcout << "beginning cold run..." << std::endl; - } + MASTER_PRINT( pid, TEXT_HIGHLIGHT << "beginning cold run..." << std::endl ); hpcg_runner.cg_opts.max_iterations = 1; timer.reset(); rc = hpcg_runner( grid_base, *hpcg_state, cg_out ); double iter_duration { timer.time() }; - if( pid == 0 ) { - thcout << "cold run duration (ms): " << iter_duration << std::endl; - } - + ASSERT_RC_SUCCESS( rc ); + MASTER_PRINT( pid, " time (ms): " << iter_duration << std::endl ); hpcg_runner.cg_opts.max_iterations = in.max_iterations; hpcg_runner.cg_opts.print_iter_stats = in.print_iter_stats; // do benchmark - for( size_t i = 0; i < in.test_repetitions && rc == SUCCESS; ++i ) { + const size_t inner_test_repetitions = in.evaluation_run ? 1 : in.inner_test_repetitions; + if( in.evaluation_run ) { + MASTER_PRINT( pid, TEXT_HIGHLIGHT << "beginning evaluation run..." << std::endl ); + } else { + MASTER_PRINT( pid, TEXT_HIGHLIGHT << "beginning test run..." << std::endl ); + } + out.inner_test_repetitions = 0; + out.times.useful = 0.0; + for( size_t i = 0; i < inner_test_repetitions; ++i ) { rc = set( x, 0.0 ); - assert( rc == SUCCESS ); - if( pid == 0 ) { - thcout << "beginning iteration: " << i << std::endl; - } + ASSERT_RC_SUCCESS( rc ); + MASTER_PRINT( pid, TEXT_HIGHLIGHT << "beginning iteration: " << i << std::endl ); timer.reset(); rc = hpcg_runner( grid_base, *hpcg_state, cg_out ); - iter_duration = timer.time(); - out.times.useful += iter_duration; - if( pid == 0 ) { - thcout << "repetition,duration (ms): " << i << "," << iter_duration << std::endl; - } - out.test_repetitions++; - if( rc != SUCCESS ) { - break; - } + out.times.useful += timer.time(); + ASSERT_RC_SUCCESS( rc ); + MASTER_PRINT( pid, "repetition,duration (ms): " << i << "," << iter_duration << std::endl ); + out.inner_test_repetitions++; } - out.times.useful /= static_cast< double >( in.test_repetitions ); - - out.performed_iterations = cg_out.iterations; - out.residual = cg_out.norm_residual; - - if( spmd<>::pid() == 0 ) { - if( rc == SUCCESS ) { - thcout << "repetitions, average time (ms): " << out.test_repetitions - << ", " << out.times.useful << std::endl; - } else { - thcerr << "Failure: call to HPCG did not succeed (" << toString( rc ) - << ")." << std::endl; - } + if( in.evaluation_run ) { + rc = collectives<>::reduce( iter_duration, 0, operators::max< double >() ); + ASSERT_RC_SUCCESS( rc ); + out.inner_test_repetitions = static_cast< size_t >( 1000.0 / out.times.useful ) + 1; + MASTER_PRINT( pid, "Evaluation run" << std::endl + << " computed residual: " << cg_out.norm_residual << std::endl + << " iterations: " << cg_out.iterations << std::endl + << " time taken (ms): " << out.times.useful << std::endl + << " deduced inner repetitions for 1s duration: " << out.inner_test_repetitions << std::endl ); + return; } + out.times.useful /= static_cast< double >( in.inner_test_repetitions ); + + MASTER_PRINT( pid, TEXT_HIGHLIGHT << "repetitions,average time (ms): " << out.inner_test_repetitions + << ", " << out.times.useful << std::endl ); // start postamble timer.reset(); @@ -436,15 +406,14 @@ void grbProgram( const simulation_input & in, struct output & out ) { out.error_code = rc; grb::set( b, 1.0 ); - out.square_norm_diff = 0.0; grb::eWiseMul( b, -1.0, x, StdRing() ); + out.square_norm_diff = 0.0; grb::dot( out.square_norm_diff, b, b, StdRing() ); // output - out.pinnedVector = std::unique_ptr< PinnedVector< NonzeroType > >( new PinnedVector< NonzeroType >( x, SEQUENTIAL ) ); + out.pinnedVector.reset( new PinnedVector< NonzeroType >( x, SEQUENTIAL ) ); // finish timing - const double time_taken { timer.time() }; - out.times.postamble = time_taken; + out.times.postamble = timer.time(); } /** @@ -455,21 +424,21 @@ static void parse_arguments( simulation_input &, size_t &, double &, int, char * int main( int argc, char ** argv ) { simulation_input sim_in; size_t test_outer_iterations; - double max_residual_norm; + double max_diff_norm; - parse_arguments( sim_in, test_outer_iterations, max_residual_norm, argc, argv ); + parse_arguments( sim_in, test_outer_iterations, max_diff_norm, argc, argv ); thcout << "System size x: " << sim_in.nx << std::endl; thcout << "System size y: " << sim_in.ny << std::endl; thcout << "System size z: " << sim_in.nz << std::endl; thcout << "System max coarsening levels " << sim_in.max_coarsening_levels << std::endl; - thcout << "Test repetitions: " << sim_in.test_repetitions << std::endl; + thcout << "Test repetitions: " << sim_in.inner_test_repetitions << std::endl; thcout << "Max iterations: " << sim_in.max_iterations << std::endl; thcout << "Direct launch: " << std::boolalpha << sim_in.evaluation_run << std::noboolalpha << std::endl; thcout << "No conditioning: " << std::boolalpha << sim_in.no_preconditioning << std::noboolalpha << std::endl; thcout << "Print iteration residual: " << std::boolalpha << sim_in.print_iter_stats << std::noboolalpha << std::endl; thcout << "Smoother steps: " << sim_in.smoother_steps << std::endl; thcout << "Test outer iterations: " << test_outer_iterations << std::endl; - thcout << "Maximum norm for residual: " << max_residual_norm << std::endl; + thcout << "Maximum norm for residual: " << max_diff_norm << std::endl; // the output struct struct output out; @@ -482,7 +451,7 @@ int main( int argc, char ** argv ) { grb::Launcher< AUTOMATIC > launcher; rc = launcher.exec( &grbProgram, sim_in, out, true ); if( rc == SUCCESS ) { - sim_in.test_repetitions = out.test_repetitions; + sim_in.inner_test_repetitions = out.inner_test_repetitions; } else { thcout << "launcher.exec returns with non-SUCCESS error code " << grb::toString( rc ) << std::endl; std::exit( -1 ); @@ -493,8 +462,8 @@ int main( int argc, char ** argv ) { grb::Benchmarker< AUTOMATIC > benchmarker; rc = benchmarker.exec( &grbProgram, sim_in, out, 1, test_outer_iterations, true ); ASSERT_RC_SUCCESS( rc ); - thcout << "Benchmark completed successfully and took " << out.performed_iterations - << " iterations to converge with residual " << out.residual << std::endl; + thcout << "Benchmark completed successfully and took " << out.cg_out.iterations + << " iterations to converge with residual " << out.cg_out.norm_residual << std::endl; if( ! out.pinnedVector ) { thcerr << "no output vector to inspect" << std::endl; @@ -510,37 +479,41 @@ int main( int argc, char ** argv ) { ASSERT_RC_SUCCESS( out.error_code ); - double residual_norm { sqrt( out.square_norm_diff ) }; - thcout << "Residual norm: " << residual_norm << std::endl; + double diff_norm { sqrt( out.square_norm_diff ) }; + thcout << "Norm of difference vector | - |: " << diff_norm << std::endl; - ASSERT_LT( residual_norm, max_residual_norm ); + ASSERT_LT( diff_norm, max_diff_norm ); thcout << "Test OK" << std::endl; return 0; } -static void parse_arguments( simulation_input & sim_in, size_t & outer_iterations, double & max_residual_norm, int argc, char ** argv ) { +static void parse_arguments( simulation_input & sim_in, size_t & outer_iterations, double & max_diff_norm, int argc, char ** argv ) { argument_parser parser; parser.add_optional_argument( "--nx", sim_in.nx, PHYS_SYSTEM_SIZE_DEF, "physical system size along x" ) .add_optional_argument( "--ny", sim_in.ny, PHYS_SYSTEM_SIZE_DEF, "physical system size along y" ) .add_optional_argument( "--nz", sim_in.nz, PHYS_SYSTEM_SIZE_DEF, "physical system size along z" ) .add_optional_argument( "--max-coarse-levels", sim_in.max_coarsening_levels, DEF_COARSENING_LEVELS, - "maximum level for coarsening; 0 means no coarsening; note: actual " - "level may be limited" + "maximum level for coarsening; 0 means no coarsening; note: actual level may be limited" " by the minimum system dimension" ) - .add_optional_argument( "--test-rep", sim_in.test_repetitions, grb::config::BENCHMARKING::inner(), "consecutive test repetitions before benchmarking" ) - .add_optional_argument( "--init-iter", outer_iterations, grb::config::BENCHMARKING::outer(), "test repetitions with complete initialization" ) - .add_optional_argument( "--max-iter", sim_in.max_iterations, MAX_ITERATIONS_DEF, "maximum number of HPCG iterations" ) - .add_optional_argument( "--max-residual-norm", max_residual_norm, MAX_NORM, - "maximum norm for the residual to be acceptable (does NOT limit " + .add_optional_argument( "--test-rep", sim_in.inner_test_repetitions, grb::config::BENCHMARKING::inner(), + "consecutive test repetitions before benchmarking" ) + .add_optional_argument( "--outer-iterations", outer_iterations, 1, + "test repetitions with complete initialization" ) + .add_optional_argument( "--max-cg-iterations", sim_in.max_iterations, MAX_ITERATIONS_DEF, + "maximum number of CG iterations" ) + .add_optional_argument( "--max-difference-norm", max_diff_norm, MAX_NORM, + "maximum acceptable norm | - | (does NOT limit " "the execution of the algorithm)" ) - .add_optional_argument( "--smoother-steps", sim_in.smoother_steps, SMOOTHER_STEPS_DEF, "number of pre/post-smoother steps; 0 disables smoothing" ) + .add_optional_argument( "--smoother-steps", sim_in.smoother_steps, SMOOTHER_STEPS_DEF, + "number of pre/post-smoother steps; 0 disables smoothing" ) .add_option( "--evaluation-run", sim_in.evaluation_run, false, "launch single run directly, without benchmarker (ignore repetitions)" ) - .add_option( "--no-preconditioning", sim_in.no_preconditioning, false, "do not apply pre-conditioning via multi-grid V cycle" ) - .add_option( "--print-iter-stats", sim_in.print_iter_stats, false, "on each iteration, print more statistics" ); - + .add_option( "--no-preconditioning", sim_in.no_preconditioning, false, + "do not apply pre-conditioning via multi-grid V cycle" ) + .add_option( "--print-iter-stats", sim_in.print_iter_stats, false, + "on each iteration, print more statistics" ); parser.parse( argc, argv ); @@ -564,7 +537,7 @@ static void parse_arguments( simulation_input & sim_in, size_t & outer_iteration std::cout << "Setting max coarsening level to " << MAX_COARSENING_LEVELS << " instead of " << sim_in.max_coarsening_levels << std::endl; sim_in.max_coarsening_levels = MAX_COARSENING_LEVELS; } - if( sim_in.test_repetitions == 0 ) { + if( sim_in.inner_test_repetitions == 0 ) { std::cerr << "ERROR no test runs selected: set \"--test-rep >0\"" << std::endl; std::exit( -1 ); } From d0a9322bd7bc0cc2652aec72abcd680b22e64f61 Mon Sep 17 00:00:00 2001 From: Alberto Scolari Date: Mon, 5 Dec 2022 17:48:53 +0100 Subject: [PATCH 09/28] fixing default number of coarsening levels --- tests/smoke/hpcg.cpp | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/tests/smoke/hpcg.cpp b/tests/smoke/hpcg.cpp index ebed53096..988e181f0 100644 --- a/tests/smoke/hpcg.cpp +++ b/tests/smoke/hpcg.cpp @@ -70,20 +70,20 @@ template< typename T > void print_norm( const grb::Vector< T > &r, const char * #include //========== MAIN PROBLEM PARAMETERS ========= -// values modifiable via cmd line args: default set as in reference HPCG -constexpr size_t PHYS_SYSTEM_SIZE_DEF{ 16UL }; -constexpr size_t PHYS_SYSTEM_SIZE_MIN{ 4UL }; -constexpr size_t DEF_COARSENING_LEVELS{ 1U }; -constexpr size_t MAX_COARSENING_LEVELS{ 4U }; -constexpr size_t MAX_ITERATIONS_DEF{ 56UL }; -constexpr size_t SMOOTHER_STEPS_DEF{ 1 }; - -// internal values -constexpr double SYSTEM_DIAG_VALUE { 26.0 }; -constexpr double SYSTEM_NON_DIAG_VALUE { -1.0 }; -constexpr size_t BAND_WIDTH_3D { 13UL }; -constexpr size_t HALO_RADIUS { 1U }; -constexpr double MAX_NORM { 4.0e-14 }; +// default simulation parameters, set as in reference HPCG +// users can input different ones via the cmd line +constexpr size_t PHYS_SYSTEM_SIZE_DEF = 16UL; +constexpr size_t PHYS_SYSTEM_SIZE_MIN = 2UL; +constexpr size_t MAX_COARSENING_LEVELS = 3U; +constexpr size_t MAX_ITERATIONS_DEF = 56UL; +constexpr size_t SMOOTHER_STEPS_DEF = 1; + +// internal values defining the simulated physical system +constexpr double SYSTEM_DIAG_VALUE = 26.0; +constexpr double SYSTEM_NON_DIAG_VALUE = -1.0; +constexpr size_t BAND_WIDTH_3D = 13UL; +constexpr size_t HALO_RADIUS = 1U; +constexpr double MAX_NORM = 4.0e-14; //============================================ using namespace grb; @@ -494,7 +494,7 @@ static void parse_arguments( simulation_input & sim_in, size_t & outer_iteration parser.add_optional_argument( "--nx", sim_in.nx, PHYS_SYSTEM_SIZE_DEF, "physical system size along x" ) .add_optional_argument( "--ny", sim_in.ny, PHYS_SYSTEM_SIZE_DEF, "physical system size along y" ) .add_optional_argument( "--nz", sim_in.nz, PHYS_SYSTEM_SIZE_DEF, "physical system size along z" ) - .add_optional_argument( "--max-coarse-levels", sim_in.max_coarsening_levels, DEF_COARSENING_LEVELS, + .add_optional_argument( "--max-coarse-levels", sim_in.max_coarsening_levels, MAX_COARSENING_LEVELS, "maximum level for coarsening; 0 means no coarsening; note: actual level may be limited" " by the minimum system dimension" ) .add_optional_argument( "--test-rep", sim_in.inner_test_repetitions, grb::config::BENCHMARKING::inner(), From 38cfbf8cde61f05d640afd4257322005fd2c50c1 Mon Sep 17 00:00:00 2001 From: Alberto Scolari Date: Mon, 23 Jan 2023 16:39:44 +0100 Subject: [PATCH 10/28] accepting non-power-of-2 system sizes --- tests/smoke/hpcg.cpp | 73 +++++++++++++++++++++++--------------------- 1 file changed, 39 insertions(+), 34 deletions(-) diff --git a/tests/smoke/hpcg.cpp b/tests/smoke/hpcg.cpp index 988e181f0..8d524ef92 100644 --- a/tests/smoke/hpcg.cpp +++ b/tests/smoke/hpcg.cpp @@ -182,21 +182,14 @@ template< typename T > void print_norm( const grb::Vector< T > & r, const char * /** - * Returns the closest power of 2 bigger or equal to \p n . + * Allocates the data structure input to the various simulation steps (CG, multi-grid, coarsening, smoothing) + * for each level of the multi-grid. The input is the vector of system sizes \p mg_sizes, with sizes in + * monotonically \b decreasing order (finest system first). + * + * This routine is algorithm-agnositc, as long as the constructors of the data types meet the requirements + * explained in \ref multigrid_allocate_data(). */ -template< typename T > -T static next_pow_2( T n ) { - static_assert( std::is_integral< T >::value, "Integral required." ); - --n; - n |= ( n >> 1 ); - for( unsigned i = 1; i <= sizeof( T ) * 4; i *= 2 ) { - const unsigned shift = static_cast< T >( 1U ) << i; - n |= ( n >> shift ); - } - return n + 1; -} - -static void allocate_system( +static void allocate_system_structures( const std::vector< size_t > &mg_sizes, std::vector< std::unique_ptr< mg_data_t > > &system_levels, std::vector< std::unique_ptr< coarsening_data_t > > &coarsener_levels, @@ -264,7 +257,7 @@ static void build_3d_system( // exclude main system std::transform( mg_generators.cbegin(), mg_generators.cend(), std::back_inserter( mg_sizes ), [] ( const builder_t &b ) { return b.system_size(); } ); - allocate_system( mg_sizes, system_levels, coarsener_levels, smoother_levels, holder ); + allocate_system_structures( mg_sizes, system_levels, coarsener_levels, smoother_levels, holder ); assert( mg_generators.size() == system_levels.size() ); assert( mg_generators.size() == smoother_levels.size() ); assert( mg_generators.size() - 1 == coarsener_levels.size() ); @@ -488,7 +481,13 @@ int main( int argc, char ** argv ) { return 0; } -static void parse_arguments( simulation_input & sim_in, size_t & outer_iterations, double & max_diff_norm, int argc, char ** argv ) { +static void parse_arguments( + simulation_input & sim_in, + size_t & outer_iterations, + double & max_diff_norm, + int argc, + char ** argv +) { argument_parser parser; parser.add_optional_argument( "--nx", sim_in.nx, PHYS_SYSTEM_SIZE_DEF, "physical system size along x" ) @@ -517,22 +516,6 @@ static void parse_arguments( simulation_input & sim_in, size_t & outer_iteration parser.parse( argc, argv ); - // check for valid values - size_t ssize { std::max( next_pow_2( sim_in.nx ), PHYS_SYSTEM_SIZE_MIN ) }; - if( ssize != sim_in.nx ) { - std::cout << "Setting system size x to " << ssize << " instead of " << sim_in.nx << std::endl; - sim_in.nx = ssize; - } - ssize = std::max( next_pow_2( sim_in.ny ), PHYS_SYSTEM_SIZE_MIN ); - if( ssize != sim_in.ny ) { - std::cout << "Setting system size y to " << ssize << " instead of " << sim_in.ny << std::endl; - sim_in.ny = ssize; - } - ssize = std::max( next_pow_2( sim_in.nz ), PHYS_SYSTEM_SIZE_MIN ); - if( ssize != sim_in.nz ) { - std::cout << "Setting system size z to " << ssize << " instead of " << sim_in.nz << std::endl; - sim_in.nz = ssize; - } if( sim_in.max_coarsening_levels > MAX_COARSENING_LEVELS ) { std::cout << "Setting max coarsening level to " << MAX_COARSENING_LEVELS << " instead of " << sim_in.max_coarsening_levels << std::endl; sim_in.max_coarsening_levels = MAX_COARSENING_LEVELS; @@ -542,7 +525,29 @@ static void parse_arguments( simulation_input & sim_in, size_t & outer_iteration std::exit( -1 ); } if( sim_in.max_iterations == 0 ) { - std::cout << "Setting number of iterations to 1" << std::endl; - sim_in.max_iterations = 1; + std::cerr << "ERROR no CG iterations selected: set \"--max-cg-iterations > 0\"" << std::endl; + std::exit( -1 ); + } + + const size_t max_system_divider = 1 << sim_in.max_coarsening_levels; + std::cout << "max_system_divider " << max_system_divider << std::endl; + for( size_t s : { sim_in.nx, sim_in.ny, sim_in.nz } ) { + std::cout << "trying " << s << std::endl; + std::lldiv_t div_res = std::div( static_cast< long long >( s ), static_cast< long long >( max_system_divider ) ); + if ( div_res.rem != 0) { + std::cerr << "ERROR: system size " << s << " cannot be coarsened " + << sim_in.max_coarsening_levels << " times because it is not exactly divisible" << std::endl; + std::exit( -1 ); + } + std::cout << "div_res.quot " << div_res.quot << std::endl; + if ( div_res.quot < static_cast< long long >( PHYS_SYSTEM_SIZE_MIN ) ) { + std::cerr << "ERROR: system size " << s << " cannot be coarsened " + << sim_in.max_coarsening_levels << " times because it is too small" << std::endl; + std::exit( -1 ); + } + if ( div_res.quot % 2 != 0 ) { + std::cerr << "ERROR: the coarsest size " << div_res.rem << " is not a multiple of 2" << std::endl; + std::exit( -1 ); + } } } From b971c8998451f9892c8ea1a350b6c39c6d0919d5 Mon Sep 17 00:00:00 2001 From: Alberto Scolari Date: Mon, 23 Jan 2023 17:03:49 +0100 Subject: [PATCH 11/28] error if too many coarsening levels --- tests/smoke/hpcg.cpp | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/tests/smoke/hpcg.cpp b/tests/smoke/hpcg.cpp index 8d524ef92..3e318b0cb 100644 --- a/tests/smoke/hpcg.cpp +++ b/tests/smoke/hpcg.cpp @@ -517,8 +517,9 @@ static void parse_arguments( parser.parse( argc, argv ); if( sim_in.max_coarsening_levels > MAX_COARSENING_LEVELS ) { - std::cout << "Setting max coarsening level to " << MAX_COARSENING_LEVELS << " instead of " << sim_in.max_coarsening_levels << std::endl; - sim_in.max_coarsening_levels = MAX_COARSENING_LEVELS; + std::cerr << "ERROR: max coarsening level is " << sim_in.max_coarsening_levels << + "; at most " << MAX_COARSENING_LEVELS << " is allowed" << std::endl; + std::exit( -1 ); } if( sim_in.inner_test_repetitions == 0 ) { std::cerr << "ERROR no test runs selected: set \"--test-rep >0\"" << std::endl; @@ -530,16 +531,13 @@ static void parse_arguments( } const size_t max_system_divider = 1 << sim_in.max_coarsening_levels; - std::cout << "max_system_divider " << max_system_divider << std::endl; for( size_t s : { sim_in.nx, sim_in.ny, sim_in.nz } ) { - std::cout << "trying " << s << std::endl; std::lldiv_t div_res = std::div( static_cast< long long >( s ), static_cast< long long >( max_system_divider ) ); if ( div_res.rem != 0) { std::cerr << "ERROR: system size " << s << " cannot be coarsened " << sim_in.max_coarsening_levels << " times because it is not exactly divisible" << std::endl; std::exit( -1 ); } - std::cout << "div_res.quot " << div_res.quot << std::endl; if ( div_res.quot < static_cast< long long >( PHYS_SYSTEM_SIZE_MIN ) ) { std::cerr << "ERROR: system size " << s << " cannot be coarsened " << sim_in.max_coarsening_levels << " times because it is too small" << std::endl; From b2aa0d50788e88cfa55e7c4fbe9f286aaabc86bd Mon Sep 17 00:00:00 2001 From: Alberto Scolari Date: Fri, 18 Nov 2022 14:53:06 +0100 Subject: [PATCH 12/28] more documentation in utilities and code cleanups: simpler logic in benchmark and renaming of MG and HPCG data structures documenting facilities for multigrid and HPCG algorithms and multigrid utils --- .../{coloring.hpp => greedy_coloring.hpp} | 20 +- include/graphblas/algorithms/hpcg/hpcg.hpp | 40 +- ...builder.hpp => single_point_coarsener.hpp} | 145 +++--- .../algorithms/hpcg/system_builder.hpp | 50 +- .../algorithms/hpcg/system_building_utils.hpp | 180 +++++-- .../multigrid/multigrid_building_utils.hpp | 41 ++ .../algorithms/multigrid/multigrid_cg.hpp | 248 ++++----- .../algorithms/multigrid/multigrid_data.hpp | 68 +-- .../multigrid/multigrid_v_cycle.hpp | 135 ++--- .../multigrid/red_black_gauss_seidel.hpp | 164 +++--- ...rsener.hpp => single_matrix_coarsener.hpp} | 86 ++-- .../utils/iterators/IteratorValueAdaptor.hpp | 61 ++- .../utils/iterators/partition_range.hpp | 73 ++- include/graphblas/utils/iterators/utils.hpp | 24 + .../utils/multigrid/array_vector_storage.hpp | 28 +- .../multigrid/dynamic_vector_storage.hpp | 14 +- .../halo_matrix_generator_iterator.hpp | 69 ++- .../linearized_halo_ndim_geometry.hpp | 226 -------- .../linearized_halo_ndim_iterator.hpp | 257 +++++++--- .../multigrid/linearized_halo_ndim_system.hpp | 482 +++++++++++++++++- .../multigrid/linearized_ndim_iterator.hpp | 96 +++- .../multigrid/linearized_ndim_system.hpp | 145 ++++-- .../graphblas/utils/multigrid/ndim_system.hpp | 20 +- .../graphblas/utils/multigrid/ndim_vector.hpp | 64 ++- tests/smoke/hpcg.cpp | 258 +++++----- tests/utils/matrix_generators.hpp | 38 +- 26 files changed, 1958 insertions(+), 1074 deletions(-) rename include/graphblas/algorithms/hpcg/{coloring.hpp => greedy_coloring.hpp} (92%) rename include/graphblas/algorithms/hpcg/{coarsener_builder.hpp => single_point_coarsener.hpp} (57%) rename include/graphblas/algorithms/multigrid/{coarsener.hpp => single_matrix_coarsener.hpp} (70%) delete mode 100644 include/graphblas/utils/multigrid/linearized_halo_ndim_geometry.hpp diff --git a/include/graphblas/algorithms/hpcg/coloring.hpp b/include/graphblas/algorithms/hpcg/greedy_coloring.hpp similarity index 92% rename from include/graphblas/algorithms/hpcg/coloring.hpp rename to include/graphblas/algorithms/hpcg/greedy_coloring.hpp index f5793b6ca..5b6f80b2c 100644 --- a/include/graphblas/algorithms/hpcg/coloring.hpp +++ b/include/graphblas/algorithms/hpcg/greedy_coloring.hpp @@ -15,9 +15,14 @@ * limitations under the License. */ +/** + * @file greedy_coloring.hpp + * @author Alberto Scolari (alberto.scolari@huawei.com) + * Utilities to partition the elements of a mesh via a simple, greedy coloring algorithm. + */ -#ifndef _H_GRB_ALGORITHMS_HPCG_COLORING -#define _H_GRB_ALGORITHMS_HPCG_COLORING +#ifndef _H_GRB_ALGORITHMS_HPCG_GREEDY_COLORING +#define _H_GRB_ALGORITHMS_HPCG_GREEDY_COLORING #include #include @@ -69,11 +74,8 @@ namespace grb { bool reorder_rows_per_color = false ) { - // This function can be used to completely transform any part of the data structures. - // Right now it does nothing, so compiling with a check for unused variables results in complaints - CoordType nrows = system.system_size(); - row_colors.insert( row_colors.begin(), nrows, nrows ); // value `nrow' means `uninitialized'; initialized colors go from 0 to nrow-1 + row_colors.insert( row_colors.begin(), nrows, nrows ); // value `nrows' means `uninitialized'; initialized colors go from 0 to nrow-1 CoordType totalColors = 1; row_colors[0] = 0; // first point gets color 0 @@ -81,6 +83,7 @@ namespace grb { typename grb::utils::multigrid::LinearizedHaloNDimSystem< DIMS, CoordType >::Iterator begin = system.begin(); begin.next_element(); // skip first row + std::vector< bool > assigned( totalColors ); while( begin.has_more_elements() ) { CoordType curRow = begin->get_element_linear(); @@ -88,7 +91,7 @@ namespace grb { // if color already assigned to curRow continue; } - std::vector< bool > assigned( totalColors, false ); + assigned.assign( totalColors, false ); CoordType currentlyAssigned = 0; while( begin.has_more_neighbours() ) { @@ -108,6 +111,7 @@ namespace grb { if( currentlyAssigned < totalColors ) { // if there is at least one color left to use, look for it + // smallest possible for( CoordType j = 0; j < totalColors; ++j ) { if( !assigned[ j ] ) { // if no neighbor with this color, use it for this row @@ -163,4 +167,4 @@ namespace grb { } // namespace algorithms } // namespace grb -#endif // _H_GRB_ALGORITHMS_HPCG_COLORING +#endif // _H_GRB_ALGORITHMS_HPCG_GREEDY_COLORING diff --git a/include/graphblas/algorithms/hpcg/hpcg.hpp b/include/graphblas/algorithms/hpcg/hpcg.hpp index 9d65aa79f..c4598323a 100644 --- a/include/graphblas/algorithms/hpcg/hpcg.hpp +++ b/include/graphblas/algorithms/hpcg/hpcg.hpp @@ -15,13 +15,25 @@ * limitations under the License. */ +/** + * @dir include/graphblas/algorithms/hpcg + * This folder contains the code specific to the HPCG benchmark implementation: generation of the physical system, + * generation of the single point coarsener and coloring algorithm. + */ + +/** + * @file hpcg.hpp + * @author Alberto Scolari (alberto.scolari@huawei.com) + * Utility to build a full HPCG runner, bringing together all needed data structures. + */ + #ifndef _H_GRB_ALGORITHMS_HPCG_HPCG #define _H_GRB_ALGORITHMS_HPCG_HPCG #include #include -#include +#include #include #include @@ -36,14 +48,20 @@ namespace grb { typename InputType, class Ring, class Minus - > using HPCGRunnerType = mg_cg_runner< IOType, NonzeroType, InputType, ResidualType, - multigrid_runner< IOType, NonzeroType, InputType, - red_black_smoother_runner< IOType, NonzeroType, Ring >, - single_point_coarsener< IOType, NonzeroType, Ring, Minus >, + > using HPCGRunnerType = MultiGridCGRunner< IOType, NonzeroType, InputType, ResidualType, + MultiGridRunner< IOType, NonzeroType, + RedBlackGSSmootherRunner< IOType, NonzeroType, Ring >, + SingleMatrixCoarsener< IOType, NonzeroType, Ring, Minus >, Ring, Minus >, Ring, Minus >; + /** + * Builds a full HPCG runner object by "assemblying" all needed information, + * with default type for smoother, coarsener and multi-grid runner. + * + * @param[in] smoother_steps how many times the smoother should run (both pre- and post-smoothing) + */ template< typename IOType, typename ResidualType, @@ -54,13 +72,13 @@ namespace grb { > HPCGRunnerType< IOType, ResidualType, NonzeroType, InputType, Ring, Minus > build_hpcg_runner( size_t smoother_steps ) { - single_point_coarsener< IOType, NonzeroType, Ring, Minus > coarsener; - red_black_smoother_runner< IOType, NonzeroType, Ring > - smoother{ smoother_steps, smoother_steps, 1UL, {}, Ring() }; + SingleMatrixCoarsener< IOType, NonzeroType, Ring, Minus > coarsener; + RedBlackGSSmootherRunner< IOType, NonzeroType, Ring > + smoother( { smoother_steps, smoother_steps, 1UL, {}, Ring() } ); - multigrid_runner< IOType, NonzeroType, InputType, - red_black_smoother_runner< IOType, NonzeroType, Ring >, - single_point_coarsener< IOType, NonzeroType, Ring, Minus >, + MultiGridRunner< IOType, NonzeroType, + RedBlackGSSmootherRunner< IOType, NonzeroType, Ring >, + SingleMatrixCoarsener< IOType, NonzeroType, Ring, Minus >, Ring, Minus > mg_runner( std::move( smoother ), std::move( coarsener ) ); diff --git a/include/graphblas/algorithms/hpcg/coarsener_builder.hpp b/include/graphblas/algorithms/hpcg/single_point_coarsener.hpp similarity index 57% rename from include/graphblas/algorithms/hpcg/coarsener_builder.hpp rename to include/graphblas/algorithms/hpcg/single_point_coarsener.hpp index 2ee848039..a3826c9c0 100644 --- a/include/graphblas/algorithms/hpcg/coarsener_builder.hpp +++ b/include/graphblas/algorithms/hpcg/single_point_coarsener.hpp @@ -1,6 +1,6 @@ /* - * Copyright 2021 Huawei Technologies Co., Ltd. + * Copyright 2022 Huawei Technologies Co., Ltd. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,8 +15,14 @@ * limitations under the License. */ -#ifndef _H_GRB_ALGORITHMS_HPCG_COARSENER_BUILDER -#define _H_GRB_ALGORITHMS_HPCG_COARSENER_BUILDER +/** + * @file single_point_coarsener.hpp + * @author Alberto Scolari (alberto.scolari@huawei.com) + * Utilities to build the coarsening matrix for an HPCG simulation. + */ + +#ifndef _H_GRB_ALGORITHMS_HPCG_SINGLE_POINT_COARSENER +#define _H_GRB_ALGORITHMS_HPCG_SINGLE_POINT_COARSENER #include #include @@ -30,40 +36,44 @@ namespace grb { namespace algorithms { + // forward declaration template< size_t DIMS, typename CoordType, typename ValueType - > - class HPCGCoarsenerBuilder; + > class SinglePointCoarsenerBuilder; /** - * @brief Class to generate the coarsening matrix of an underlying \p DIMS -dimensional system. + * Iterator class to generate the coarsening matrix for an HPCG simulation. * - * This class coarsens a finer system to a coarser system by projecting each input value (column), - * espressed in finer coordinates, to an output (row) value espressed in coarser coordinates. - * The coarser sizes are assumed to be row_generator#physical_sizes, while the finer sizes are here - * stored inside #finer_sizes. + * The coarsening matrix samples a single value from the finer space for every element + * of the coarser space; this value is the first one (i.e. the one with smallest coordinates) + * in the finer sub-space corresponding to each coarser element. * - * The corresponding refinement matrix is obtained by transposing the coarsening matrix. + * This coarsening method is simple but can lead to unstable results, especially with certain combinations + * of smoothers and partitioning methods. * - * @tparam DIMS number of dimensions of the system - * @tparam T type of matrix values + * This iterator is random-access. + * + * @tparam DIMS number of dimensions + * @tparam CoordType type storing the coordinates and the sizes + * @tparam ValueType type of the nonzero: it must be able to represent 1 (the value to sample + * the finer value) */ template< size_t DIMS, typename CoordType, typename ValueType - > struct HPCGCoarsenerGeneratorIterator { + > struct SinglePointCoarsenerIterator { - friend HPCGCoarsenerBuilder< DIMS, CoordType, ValueType >; + friend SinglePointCoarsenerBuilder< DIMS, CoordType, ValueType >; using RowIndexType = CoordType; ///< numeric type of rows using ColumnIndexType = CoordType; using LinearSystemType = grb::utils::multigrid::LinearizedNDimSystem< CoordType, grb::utils::multigrid::ArrayVectorStorage< DIMS, CoordType > >; using LinearSystemIterType = typename LinearSystemType::Iterator; - using SelfType = HPCGCoarsenerGeneratorIterator< DIMS, CoordType, ValueType >; + using SelfType = SinglePointCoarsenerIterator< DIMS, CoordType, ValueType >; using ArrayType = std::array< CoordType, DIMS >; struct _HPCGValueGenerator { @@ -100,19 +110,16 @@ namespace grb { using reference = const value_type&; using difference_type = typename LinearSystemIterType::difference_type; - HPCGCoarsenerGeneratorIterator( const SelfType &o ) = default; + SinglePointCoarsenerIterator( const SelfType &o ) = default; - HPCGCoarsenerGeneratorIterator( SelfType &&o ) = default; + SinglePointCoarsenerIterator( SelfType &&o ) = default; SelfType & operator=( const SelfType & ) = default; SelfType & operator=( SelfType && ) = default; /** - * @brief Increments the row and the column according to the respective physical sizes, - * thus iterating onto the coarsening matrix coordinates. - * - * @return \code *this \endcode, i.e. the same object with the updates row and column + * Advances \c this by 1 in constant time. */ SelfType & operator++() noexcept { (void) ++_sys_iter; @@ -120,36 +127,36 @@ namespace grb { return *this; } + /** + * Advances \c this by \p offset in constant time. + */ SelfType & operator+=( size_t offset ) { _sys_iter += offset; update_coords(); return *this; } + /** + * Computes the difference between \c this and \p o as integer. + */ difference_type operator-( const SelfType &o ) const { return this->_sys_iter - o._sys_iter; } /** - * @brief Returns whether \c this and \p o differ. + * Returns whether \c this and \p o differ. */ bool operator!=( const SelfType &o ) const { return this->_sys_iter != o._sys_iter; } /** - * @brief Returns whether \c this and \p o are equal. + * Returns whether \c this and \p o are equal. */ bool operator==( const SelfType &o ) const { return ! this->operator!=( o ); } - /** - * @brief Operator returning the triple to directly access row, column and element values. - * - * Useful when building the matrix by copying the triple of coordinates and value, - * like for the BSP1D backend. - */ reference operator*() const { return _val; } @@ -159,53 +166,48 @@ namespace grb { } /** - * @brief Returns the current row, according to the coarser system. + * Returns the current row, within the coarser system. */ inline RowIndexType i() const { return _val.i(); } /** - * @brief Returns the current column, according to the finer system. + * Returns the current column, within the finer system. */ inline ColumnIndexType j() const { return _val.j(); } /** - * @brief Returns always 1, as the coarsening keeps the same value. + * Returns always 1, as the coarsening keeps the same value. */ inline ValueType v() const { return _val.v(); } private: - //// incremented when incrementing the row coordinates; is is the ration between - //// #finer_sizes and row_generator#physical_sizes const LinearSystemType *_lin_sys; - const ArrayType *_steps; ///< array of steps, i.e. how much each column coordinate (finer system) must be + const ArrayType *_steps; LinearSystemIterType _sys_iter; value_type _val; /** - * @brief Construct a new \c HPCGCoarsenerGeneratorIterator object from the coarser and finer sizes, - * setting its row at \p _current_row and the column at the corresponding value. + * Construct a new SinglePointCoarsenerIterator object starting from the LinearizedNDimSystem + * object \p system describing the \b coarser system and the \b ratios \p steps between each finer and + * the corresponding corser dimension. * - * Each finer size must be an exact multiple of the corresponding coarser size, otherwise the - * construction will throw an exception. - * - * @param _coarser_sizes sizes of the coarser system (rows) - * @param _finer_sizes sizes of the finer system (columns) - * @param _current_row row (in the coarser system) to set the iterator on + * @param system LinearizedNDimSystem object describing the coarser system + * @param steps ratios per dimension between finer and coarser system */ - HPCGCoarsenerGeneratorIterator( + SinglePointCoarsenerIterator( const LinearSystemType &system, const ArrayType &steps ) noexcept : _lin_sys( &system ), _steps( &steps ), _sys_iter( _lin_sys->begin() ), - _val(0, 0) + _val( 0, 0 ) { update_coords(); } @@ -216,13 +218,13 @@ namespace grb { } /** - * @brief Returns the row coordinates converted to the finer system, to compute + * Returns the row coordinates converted to the finer system, to compute * the column value. */ ColumnIndexType coarse_rows_to_finer_col() const noexcept { - ColumnIndexType finer { 0 }; - ColumnIndexType s { 1 }; - for( size_t i { 0 }; i < DIMS; i++ ) { + ColumnIndexType finer = 0; + ColumnIndexType s = 1; + for( size_t i = 0; i < DIMS; i++ ) { s *= (*_steps)[ i ]; finer += s * _sys_iter->get_position()[ i ]; s *= _lin_sys->get_sizes()[ i ]; @@ -231,21 +233,36 @@ namespace grb { } }; + /** + * Builder object to create iterators that generate a coarsening matrix. + * + * It is a facility to generate beginning and end iterators and abstract the logic away from users. + * + * @tparam DIMS number of dimensions + * @tparam CoordType type storing the coordinates and the sizes + * @tparam ValueType type of the nonzero: it must be able to represent 1 (the value to sample + * the finer value) + */ template< size_t DIMS, typename CoordType, typename ValueType - > class HPCGCoarsenerBuilder { + > class SinglePointCoarsenerBuilder { public: using ArrayType = std::array< CoordType, DIMS >; - using Iterator = HPCGCoarsenerGeneratorIterator< DIMS, CoordType, ValueType >; - using SelfType = HPCGCoarsenerBuilder< DIMS, CoordType, ValueType >; + using Iterator = SinglePointCoarsenerIterator< DIMS, CoordType, ValueType >; + using SelfType = SinglePointCoarsenerBuilder< DIMS, CoordType, ValueType >; - HPCGCoarsenerBuilder( + /** + * Construct a new SinglePointCoarsenerBuilder object from the sizes of finer system + * and those of the coarser system; finer sizes must be an exact multiple of coarser sizes, + * otherwise an exception is raised. + */ + SinglePointCoarsenerBuilder( const ArrayType &_finer_sizes, const ArrayType &_coarser_sizes ) : system( _coarser_sizes.begin(), _coarser_sizes.end() ) { - for( size_t i { 0 }; i < DIMS; i++ ) { + for( size_t i = 0; i < DIMS; i++ ) { // finer size MUST be an exact multiple of coarser_size std::ldiv_t ratio = std::ldiv( _finer_sizes[ i ], _coarser_sizes[ i ] ); if( ratio.quot < 2 || ratio.rem != 0 ) { @@ -258,26 +275,34 @@ namespace grb { } } - HPCGCoarsenerBuilder( const SelfType & ) = delete; + SinglePointCoarsenerBuilder( const SelfType & ) = delete; - HPCGCoarsenerBuilder( SelfType && ) = delete; + SinglePointCoarsenerBuilder( SelfType && ) = delete; SelfType & operator=( const SelfType & ) = delete; SelfType & operator=( SelfType && ) = delete; + /** + * Returns the size of the finer system, i.e. its number of elements. + */ size_t system_size() const { return system.system_size(); } + /** + * Produces a beginning iterator to generate the coarsening matrix. + */ Iterator make_begin_iterator() { return Iterator( system, steps ); } + /** + * Produces an end iterator to stop the generation of the coarsening matrix. + */ Iterator make_end_iterator() { Iterator result( system, steps ); - result += system_size() - 1; // do not trigger boundary checks - ++result; + result += system_size(); // do not trigger boundary checks return result; } @@ -292,5 +317,5 @@ namespace grb { } // namespace algorithms } // namespace grb -#endif // _H_GRB_ALGORITHMS_HPCG_COARSENER_BUILDER +#endif // _H_GRB_ALGORITHMS_HPCG_SINGLE_POINT_COARSENER diff --git a/include/graphblas/algorithms/hpcg/system_builder.hpp b/include/graphblas/algorithms/hpcg/system_builder.hpp index 700718e3b..48a2e640d 100644 --- a/include/graphblas/algorithms/hpcg/system_builder.hpp +++ b/include/graphblas/algorithms/hpcg/system_builder.hpp @@ -1,6 +1,6 @@ /* - * Copyright 2021 Huawei Technologies Co., Ltd. + * Copyright 2022 Huawei Technologies Co., Ltd. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,17 +18,7 @@ /** * @file system_builders.hpp * @author Alberto Scolari (alberto.scolari@huawei.com) - * @brief Utilities to build matrices for an HPCG simulation in a generic number of dimensions - * - * In particular, the main matrices are: - * - a system matrix, generated from an N-dimenional space of coordinates by iterating along - * each dimension in priority order, where the first dimension has highest priority and the last - * dimension least priority; for each point (row), all its N-dimensional neighbours within - * a given distance are generated for the column - * - a coarsening matrix, generated by iterating on a coarser system of N dimensions (row) and projecting - * each point to a corresponding system of finer sizes - * - * @date 2021-04-30 + * Utilities to build the system matrix for an HPCG simulation in a generic number of dimensions. */ #ifndef _H_GRB_ALGORITHMS_HPCG_SYSTEM_BUILDER @@ -52,6 +42,14 @@ namespace grb { namespace algorithms { + /** + * Builder class to build the iterators that generate an HPCG system matrix, describing a + * \p DIMS -dimensional simulation mesh for Fourier-like heat propagation. + * + * @tparam DIMS dimensions of the mesh + * @tparam CoordType type storing the coordinates and sizes of the matrix + * @tparam ValueType nonzero type + */ template< size_t DIMS, typename CoordType, @@ -81,6 +79,14 @@ namespace grb { using Iterator = grb::utils::multigrid::HaloMatrixGeneratorIterator< DIMS, CoordType, ValueType, HPCGDiagGenerator >; + /** + * Construct a new HPCGSystemBuilder object from the data of the physical system. + * + * @param sizes sizes along each dimension + * @param halo halo size + * @param diag value along the diagonal, for self-interactions + * @param non_diag value outside the diagonal, for element-element interaction + */ HPCGSystemBuilder( const std::array< CoordType, DIMS > &sizes, CoordType halo, @@ -108,26 +114,41 @@ namespace grb { HPCGSystemBuilder< DIMS, CoordType, ValueType > & operator=( HPCGSystemBuilder< DIMS, CoordType, ValueType > && ) = default; + /** + * Number of elements of the mesh. + */ size_t system_size() const { return _system.base_system_size(); } + /** + * Total number of neighbors for all elements of the mesh. + */ size_t num_neighbors() const { return _system.halo_system_size(); } + /** + * Get the generator object, i.e. the HaloSystemType object that describes the geometry + * of the simulation mesh. + */ const HaloSystemType & get_generator() const { return _system; } + /** + * Builds the beginning iterator to generate the system matrix. + */ Iterator make_begin_iterator() const { return Iterator( _system, _diag_generator ); } + /** + * Builds the end iterator to generate the system matrix. + */ Iterator make_end_iterator() const { Iterator result( _system, _diag_generator ); - result += num_neighbors() - 1; // do not trigger boundary checks - ++result; + result += num_neighbors(); return result; } @@ -139,7 +160,6 @@ namespace grb { return _diag_generator._non_diag; } - private: HaloSystemType _system; HPCGDiagGenerator _diag_generator; diff --git a/include/graphblas/algorithms/hpcg/system_building_utils.hpp b/include/graphblas/algorithms/hpcg/system_building_utils.hpp index 088bb9fb3..c0b522521 100644 --- a/include/graphblas/algorithms/hpcg/system_building_utils.hpp +++ b/include/graphblas/algorithms/hpcg/system_building_utils.hpp @@ -1,6 +1,6 @@ /* - * Copyright 2021 Huawei Technologies Co., Ltd. + * Copyright 2022 Huawei Technologies Co., Ltd. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,10 +16,9 @@ */ /** - * @file hpcg_system_building_utils.hpp + * @file system_building_utils.hpp * @author Alberto Scolari (alberto.scolari@huawei.com) - * @brief Utilities to build an antire system for HPCG simulations in an arbitrary number of dimensions. - * @date 2021-04-30 + * Utilities to build an antire system for HPCG simulations in an arbitrary number of dimensions. */ #ifndef _H_GRB_ALGORITHMS_HPCG_SYSTEM_BUILDING_UTILS @@ -29,23 +28,24 @@ #include #include #include -#include #include #include #include +#include +#include #include #include #include "system_builder.hpp" -#include "coarsener_builder.hpp" -#include "coloring.hpp" +#include "single_point_coarsener.hpp" +#include "greedy_coloring.hpp" namespace grb { namespace algorithms { /** - * @brief Container of the parameter for HPCG simulation generation: physical system characteristics and + * Container of the parameter for HPCG simulation generation: physical system characteristics and * coarsening information. * * @tparam DIMS dimensions of the physical system @@ -54,7 +54,7 @@ namespace grb { template< size_t DIMS, typename NonzeroType - > struct hpcg_system_params { + > struct HPCGSystemParams { std::array< size_t, DIMS > physical_sys_sizes; size_t halo_size; NonzeroType diag_value; @@ -64,29 +64,43 @@ namespace grb { size_t coarsening_step; }; + /** + * Builds all required system generators for an entire multi-grid simulation; each generator + * corresponds to a level of the HPCG system multi-grid, with increasingly coarser sizes, and can + * generate the system matrix of that level. All required pieces of information required to build + * the levels is stored in \p params. + * + * @tparam DIMS number of dimensions + * @tparam CoordType type storing the coordinates and the sizes + * @tparam NonzeroType type of the nonzero + * @param[in] params structure with the parameters to build an entire HPCG simulation + * @param[out] mg_generators std::vector of HPCGSystemBuilder, one per layer of the multi-grid + */ template< size_t DIMS, typename CoordType, typename NonzeroType > void hpcg_build_multigrid_generators( - const hpcg_system_params< DIMS, NonzeroType > ¶ms, + const HPCGSystemParams< DIMS, NonzeroType > ¶ms, std::vector< grb::algorithms::HPCGSystemBuilder< DIMS, CoordType, NonzeroType > > &mg_generators ) { static_assert( DIMS > 0, "DIMS must be > 0" ); - size_t const current_size{ std::accumulate( params.physical_sys_sizes.cbegin(), params.physical_sys_sizes.cend(), 1UL, - std::multiplies< size_t >() ) }; + size_t const current_size = std::accumulate( params.physical_sys_sizes.cbegin(), + params.physical_sys_sizes.cend(), 1UL, std::multiplies< size_t >() ); if( current_size > std::numeric_limits< CoordType >::max() ) { - throw std::domain_error( "CoordT cannot store the matrix coordinates" ); + throw std::domain_error( "CoordType cannot store the matrix coordinates" ); } - size_t min_physical_size { *std::min_element( params.physical_sys_sizes.cbegin(), params.physical_sys_sizes.cend() ) }; + size_t min_physical_size = *std::min_element( params.physical_sys_sizes.cbegin(), + params.physical_sys_sizes.cend() ); if( min_physical_size < params.min_phys_size ) { throw std::domain_error( "the initial system is too small" ); } std::array< CoordType, DIMS > coord_sizes; // type-translate coordinates - std::copy( params.physical_sys_sizes.cbegin(), params.physical_sys_sizes.cend(), coord_sizes.begin() ); + std::copy( params.physical_sys_sizes.cbegin(), params.physical_sys_sizes.cend(), + coord_sizes.begin() ); // generate hierarchical coarseners for( size_t coarsening_level = 0UL; @@ -94,36 +108,44 @@ namespace grb { coarsening_level++ ) { // build generator - mg_generators.emplace_back( coord_sizes, params.halo_size, params.diag_value, params.non_diag_value ); + mg_generators.emplace_back( coord_sizes, params.halo_size, + params.diag_value, params.non_diag_value ); // prepare for new iteration min_physical_size /= params.coarsening_step; std::for_each( coord_sizes.begin(), coord_sizes.end(), - [ ¶ms ]( CoordType &v ){ v /= params.coarsening_step; }); - } - } - - template< typename CoordType > void hpcg_split_rows_by_color( - const std::vector< CoordType > & row_colors, - size_t num_colors, - std::vector< std::vector< CoordType > > & per_color_rows - ) { - per_color_rows.resize( num_colors ); - for( CoordType i = 0; i < row_colors.size(); i++ ) { - per_color_rows[ row_colors[ i ] ].push_back( i ); + [ ¶ms ]( CoordType &v ) { + std::ldiv_t ratio = std::ldiv( v, params.coarsening_step ); + if( ratio.rem != 0 ) { + throw std::invalid_argument( + std::string( "system size " ) + std::to_string( v ) + + std::string( " is not divisible by " ) + + std::to_string( params.coarsening_step ) + ); + } + v = ratio.quot; + }); } } + /** + * Populates the system matrix \p M out of the builder \p system_generator. + * + * The matrix \p M must have been previously allocated and initialized with the proper sizes, + * as this procedure only populates it with the nozeroes generated by \p system_generator. + * + * This function takes care of the parallelism by employing random-access iterators and by + * \b parallelizing the generation across multiple processes in case of distributed execution. + */ template < size_t DIMS, typename CoordType, - typename NonzeroType, - enum grb::Backend B + typename NonzeroType > grb::RC hpcg_populate_system_matrix( const grb::algorithms::HPCGSystemBuilder< DIMS, CoordType, NonzeroType > &system_generator, - grb::Matrix< NonzeroType, B > &M + grb::Matrix< NonzeroType > &M ) { - const size_t pid { spmd<>::pid() }; + const size_t pid = spmd<>::pid(); if( pid == 0) { std::cout << "- generating system matrix..."; @@ -133,10 +155,20 @@ namespace grb { typename grb::algorithms::HPCGSystemBuilder< DIMS, CoordType, NonzeroType >::Iterator end( system_generator.make_end_iterator() ); - grb::utils::partition_iteration_range_on_procs( system_generator.num_neighbors(), begin, end ); + grb::utils::partition_iteration_range_on_procs( spmd<>::nprocs(), spmd<>::pid(), + system_generator.num_neighbors(), begin, end ); return buildMatrixUnique( M, begin, end, grb::IOMode::PARALLEL ); } + /** + * Populates the coarsening data \p coarsener (in particular the coarsening matrix) from the + * builder of the finer system \p finer_system_generator and that of the coarser system + * \p coarser_system_generator. + * + * This function takes care of parallelizing the generation by using a random-access iterator + * to generate the coarsening matrix and by distributing the generation across nodes + * of a distributed system (if any). + */ template< size_t DIMS, typename CoordType, @@ -145,7 +177,7 @@ namespace grb { > grb::RC hpcg_populate_coarsener( const grb::algorithms::HPCGSystemBuilder< DIMS, CoordType, NonzeroType > &finer_system_generator, const grb::algorithms::HPCGSystemBuilder< DIMS, CoordType, NonzeroType > &coarser_system_generator, - coarsening_data< IOType, NonzeroType > &coarsener + CoarseningData< IOType, NonzeroType > &coarsener ) { static_assert( DIMS > 0, "DIMS must be > 0" ); @@ -158,8 +190,8 @@ namespace grb { throw std::invalid_argument( "wrong sizes"); } - size_t const rows { coarser_size }; - size_t const cols { finer_size }; + size_t const rows = coarser_size; + size_t const cols = finer_size; assert( finer_sizes.size() == coarser_sizes.size() ); @@ -169,18 +201,48 @@ namespace grb { " with rows == and cols == " ); } - grb::algorithms::HPCGCoarsenerBuilder< DIMS, CoordType, NonzeroType > coarsener_builder( finer_sizes, coarser_sizes ); - grb::algorithms::HPCGCoarsenerGeneratorIterator< DIMS, CoordType, NonzeroType > begin( coarsener_builder.make_begin_iterator() ); - grb::algorithms::HPCGCoarsenerGeneratorIterator< DIMS, CoordType, NonzeroType > end( coarsener_builder.make_end_iterator() ); - grb::utils::partition_iteration_range_on_procs( coarsener_builder.system_size(), begin, end ); + using gen_t = typename grb::algorithms::SinglePointCoarsenerBuilder< DIMS, CoordType, NonzeroType >; + gen_t coarsener_builder( finer_sizes, coarser_sizes ); + typename gen_t::Iterator begin( coarsener_builder.make_begin_iterator() ), + end( coarsener_builder.make_end_iterator() ); + grb::utils::partition_iteration_range_on_procs( spmd<>::nprocs(), spmd<>::pid(), + coarsener_builder.system_size(), begin, end ); return buildMatrixUnique( M, begin, end, grb::IOMode::PARALLEL ); } namespace internal { + /** + * Store row values based on their color into separate vectors. + * + * @param[in] row_colors for each row (corresponding to a vector position) its color + * @param[in] num_colors number of colors, i.e. max across all values in \p row_colors + 1 + * @param[out] per_color_rows for each position \a i it stores an std::vector with all rows + * of color \a i inside \p row_colors + */ + template< typename CoordType > void hpcg_split_rows_by_color( + const std::vector< CoordType > & row_colors, + size_t num_colors, + std::vector< std::vector< CoordType > > & per_color_rows + ) { + per_color_rows.resize( num_colors ); + for( CoordType i = 0; i < row_colors.size(); i++ ) { + per_color_rows[ row_colors[ i ] ].push_back( i ); + } + } + + /** + * Utility class implementing a random-access iterator that always returns a + * \c true value. + * + * It is used in the following to build mask vectors via buildVectorUnique(), where + * all the non-zero positions are \c true. + * + * @tparam CoordType type of the internal coordinate + */ template< typename CoordType > struct true_iter { - static const bool __TRUE = true; + // static const bool __TRUE; using self_t = true_iter< CoordType >; using iterator_category = std::random_access_iterator_tag; @@ -225,12 +287,11 @@ namespace grb { private: CoordType index; + const bool __TRUE = true; // for its address to be passed outside }; - template< typename CoordType > const bool true_iter< CoordType >::__TRUE; - /** - * @brief Populates \p masks with static color mask generated for a squared matrix of size \p matrix_size . + * Populates \p masks with static color mask generated for a squared matrix of size \p matrix_size . * * Colors are built in the range [0, \p colors ), with the mask for color 0 being the array * of values true in the positions \f$ [0, colors, 2*colors, ..., floor((system_size - 1)/colors) * color] \f$, @@ -241,17 +302,15 @@ namespace grb { * only with the \c true values, leading to sparse vectors. This saves on storage space and allows * GraphBLAS routines (like \c eWiseLambda() ) to iterate only on true values. * - * @tparam B GraphBLAS backend for the vector * @param masks output vector of color masks * @param matrix_size size of the system matrix * @param colors numbers of colors masks to build; it must be < \p matrix_size * @return grb::RC the success value returned when trying to build the vector */ - template< enum grb::Backend B > grb::RC hpcg_build_static_color_masks( size_t matrix_size, const std::vector< std::vector< size_t > > &per_color_rows, - std::vector< grb::Vector< bool, B > > & masks + std::vector< grb::Vector< bool> > &masks ) { if( ! masks.empty() ) { throw std::invalid_argument( "vector of masks is expected to be empty" ); @@ -273,7 +332,7 @@ namespace grb { std::vector< size_t >::const_iterator end = rows.cend(); // partition_iteration_range( rows.size(), begin, end ); grb::RC rc = grb::buildVectorUnique( output_mask, begin , end, true_iter< size_t >( 0 ), - true_iter< size_t >( std::distance( begin, end ) ), IOMode::SEQUENTIAL ); + true_iter< size_t >( rows.size() ), IOMode::SEQUENTIAL ); if( rc != SUCCESS ) { std::cerr << "error while creating output mask for color " << i << ": " << toString( rc ) << std::endl; @@ -297,15 +356,32 @@ namespace grb { } // namespace internal + /** + * Populates the smoothing information \p smoothing_info for a Red-Black Gauss-Seidel smoother + * to be used for an HPCG simulation. The information about the mesh to smooth are passed + * via \p system_generator. + * + * Steps for the smoother generation: + * + * 1. the mesh elements (the system matrix rows) are colored via a greedy algorithm, so that + * no two neighboring elements have the same color; this phase colors the \b entire system + * and cannot be parallelized, even in a distributed system, since the current coloring algorithm + * is \b not distributed + * 2. rows are split according to their color + * 3. for each color \a c the color mask with the corresponding rows is generated: + * a dedicated sparse grb::Vector signals the rows of color \a c (by marking them as \c true + * ); such a vector allows updating all rows of color \a c in \b parallel when used as a mask + * to an mxv() operation (as done during smoothing) + */ template< size_t DIMS, typename CoordType, typename NonzeroType > grb::RC hpcg_populate_smoothing_data( const grb::algorithms::HPCGSystemBuilder< DIMS, CoordType, NonzeroType > &system_generator, - smoother_data< NonzeroType > &smoothing_info + SmootherData< NonzeroType > &smoothing_info ) { - const size_t pid { spmd<>::pid() }; + const size_t pid = spmd<>::pid(); grb::RC rc = set( smoothing_info.A_diagonal, system_generator.get_diag_value() ); if( rc != grb::SUCCESS ) { @@ -321,7 +397,9 @@ namespace grb { std::vector< CoordType > colors, color_counters; hpcg_greedy_color_ndim_system( system_generator.get_generator(), colors, color_counters ); std::vector< std::vector< CoordType > > per_color_rows; - hpcg_split_rows_by_color( colors, color_counters.size(), per_color_rows ); + internal::hpcg_split_rows_by_color( colors, color_counters.size(), per_color_rows ); + colors.clear(); + colors.shrink_to_fit(); if( rc != grb::SUCCESS ) { if( pid == 0 ) { std::cout << "error: " << __LINE__ << std::endl; diff --git a/include/graphblas/algorithms/multigrid/multigrid_building_utils.hpp b/include/graphblas/algorithms/multigrid/multigrid_building_utils.hpp index 34347582e..75d23a7cc 100644 --- a/include/graphblas/algorithms/multigrid/multigrid_building_utils.hpp +++ b/include/graphblas/algorithms/multigrid/multigrid_building_utils.hpp @@ -15,6 +15,12 @@ * limitations under the License. */ +/** + * @file multigrid_building_utils.hpp + * @author Alberto Scolari (alberto.scolari@huawei.com) + * Utilities to allocate data for an entire multi-grid simulation. + */ + #include #include #include @@ -25,6 +31,38 @@ namespace grb { namespace algorithms { + /** + * Allocates all the levels for an entire multi-grid simulation for the multi-grid v-cycle, + * the coarsener and the smoother. This routine just allocates and initializes the data structures, + * but does \b not populate them, which depends on the specific algorithms. + * + * Thanks to the templating, this routine is meant to be independent from the specific algorithm + * choosen for the simulation, but simply implements the logic to move from one level (finer) + * to the next one (coarser). To be used with any data structure, the constructor of each + * structure must meet a certain interface, as explained in the following. + * + * Note: structures are allocated on the heap and manged via an std::unique_ptr for efficiency + * and convenience: since they may store large data amounts, moving them via their move (copy) + * constructor (as required for the growth of an std::vector) may be costly, and forces the user + * to implement the move constructor for each type (which may be annoying). + * Furthermore, avoiding movement (copy) entirely protects against possible bugs + * in move (copy)-constructor logic (not uncommon in prototypes). + * + * @tparam MGInfoType type holding the information to run the chosen multi-grid algorithm: + * its constructor must take in input the coarsening level (0 to \p mg_sizes.size() ) + * and the size of the system matrix for that level + * @tparam CoarsenerInfoType type holding the information for the coarsener; + * its constructor must take in input the size of the finer system matrix and that of + * the coarser system matrix (in this order) + * @tparam SmootherInfoType type holding the information for the smoother; + * its constructor must take in input the size of the system matrix for that level + * + * @param mg_sizes sizes of the system matrix for each level of the multi-grid + * @param system_levels system data (system matrix, residual, solution, ...) for each level + * @param coarsener_levels at position \a i of this vector, data to coarsen from level \a i + * (system size \p mg_sizes [i] ) to level \a i+1 (system size \p mg_sizes [i+1] ) + * @param smoother_levels smoother data for each level + */ template< typename MGInfoType, typename CoarsenerInfoType, @@ -43,6 +81,9 @@ namespace grb { smoother_levels.emplace_back( new SmootherInfoType( finer_size ) ); // create smoother for main for( size_t i = 1; i < mg_sizes.size(); i++ ) { size_t coarser_size = mg_sizes[ i ]; + if( coarser_size >= finer_size ) { + throw std::invalid_argument( "system sizes not monotonically decreasing" ); + } coarsener_levels.emplace_back( new CoarsenerInfoType( finer_size, coarser_size ) ); system_levels.emplace_back( new MGInfoType( i, coarser_size ) ); smoother_levels.emplace_back( new SmootherInfoType( coarser_size ) ); diff --git a/include/graphblas/algorithms/multigrid/multigrid_cg.hpp b/include/graphblas/algorithms/multigrid/multigrid_cg.hpp index 2ac3c0770..735f87d81 100644 --- a/include/graphblas/algorithms/multigrid/multigrid_cg.hpp +++ b/include/graphblas/algorithms/multigrid/multigrid_cg.hpp @@ -1,6 +1,6 @@ /* - * Copyright 2021 Huawei Technologies Co., Ltd. + * Copyright 2022 Huawei Technologies Co., Ltd. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,11 +16,17 @@ */ /** - * @file hpcg.hpp + * @dir include/graphblas/algorithms/mutligrid + * This folder contains the implementation of the algorithms for a basic multi-grid V-cycle solver: + * Conjugate Gradient with multi-grid, a basic V-cycle multi-grid implementation, a single-matrix coarsener/ + * prolonger, an implementation of a Red-Black Gauss-Seidel smoother. These algorithms can be composed + * via their specific runners, as in the example HPCG benchmark. + */ + +/** + * @file multigrid_cg.hpp * @author Alberto Scolari (alberto.scolari@huawei.com) - * @brief File with the main routine to run a full HPCG simulation, comprising multi-grid runs - * with Red-Black Gauss-Seidel smoothing. - * @date 2021-04-30 + * Algorithm and runner for a Conjugate Gradient solver augmented with a multi-grid solver. */ #ifndef _H_GRB_ALGORITHMS_MULTIGRID_CG @@ -30,26 +36,20 @@ #include #include +#include #include "multigrid_data.hpp" -#include - namespace grb { namespace algorithms { /** - * @brief Data stucture to store the data for a full HPCG run: system vectors and matrix, - * coarsening information and temporary vectors. + * Data stucture to store the vectors specific to the Conjugate Gradient algorithm, + * including inputs, outputs and temporary vectors. * - * This data structures contains all the needed vectors and matrices to solve a linear system - * \f$ A x = b \f$. As for \ref system_data, internal elements are built and their sizes properly initialized - * to #system_size, but internal values are \b not initialized, as they are left to user's logic. - * Similarly, the coarsening information in #coarser_level is to be initialized by users by properly - * building a \code multigrid_data \endcode object and storing its pointer into - * #coarser_level; on destruction, #coarser_level will also be properly destroyed without - * user's intervention. + * Input and output vectors use the same naming scheme as for the corresponding mathematics, + * where the equation to solve is conventionally written as \f$ A x = b \f$. * * @tparam IOType type of values of the vectors for intermediate results * @tparam NonzeroType type of the values stored inside the system matrix #A @@ -59,107 +59,83 @@ namespace grb { typename IOType, typename NonzeroType, typename InputType - > struct mg_cg_data { + > struct MultiGridCGData { - grb::Vector< InputType > b; ///< right-side vector of known values + grb::Vector< InputType > b; ///< Right-side vector of known values. grb::Vector< IOType > u; ///< temporary vectors (typically for CG exploration directions) grb::Vector< IOType > p; ///< temporary vector (typically for x refinements coming from the multi-grid run) - grb::Vector< IOType > x; // system solution being refined over the iterations: it us up to the user - ///< to set the initial solution value + grb::Vector< IOType > x; ///< system solution being refined over the iterations: it us up to the user + ///< to set the initial solution value to something meaningful /** - * @brief Construct a new \c hpcg_data object by building vectors and matrices and by setting - * #coarser_level to \c nullptr (i.e. no coarser level is assumed). - * - * @param[in] sys_size the size of the simulated system, i.e. of all the internal vectors and matrices + * Construct a new \c MultiGridCGData object by building its vectors with size \p sys_size. */ - mg_cg_data( size_t sys_size ) : + MultiGridCGData( size_t sys_size ) : b( sys_size ), u( sys_size ), p( sys_size ), x( sys_size ) {} - grb::RC zero_temp_vectors() { - grb::RC rc = grb::set( u, 0 ); - rc = rc ? rc : grb::set( p, 0 ); + grb::RC init_vectors( IOType zero ) { + grb::RC rc = grb::set( u, zero ); + rc = rc ? rc : grb::set( p, zero ); return rc; } }; + /** + * Container for various options and algebraic abstractions to be passed to a CG simulation with multi-grid. + */ template < typename IOType, typename ResidualType, - class Ring = Semiring< grb::operators::add< IOType >, grb::operators::mul< IOType >, grb::identities::zero, grb::identities::one >, - class Minus = operators::subtract< IOType > - > - struct cg_options { - bool with_preconditioning; - size_t max_iterations; - ResidualType tolerance; - bool print_iter_stats; - Ring ring; - Minus minus; + class Ring, + class Minus + > struct CGOptions { + bool with_preconditioning; ///< whether preconditioning is enabled + size_t max_iterations; ///< max number of allowed iterations for CG: after that, the solver is halted + ///< and the result achieved so far returned + ResidualType tolerance; ///< ratio between initial residual and current residual that halts the solver + ///< if reached, for the solution is to be considered "good enough" + bool print_iter_stats; ///< whether to print information on the multi-grid and the residual on each iteration + Ring ring; ///< algebraic ring to be used + Minus minus; ///< minus operator to be used }; - - template < typename ResidualType > struct cg_out_data { - size_t iterations; - ResidualType norm_residual; + /** + * Structure for the output information of a CG run. + */ + template < typename ResidualType > struct CGOutInfo { + size_t iterations; ///< number of iterations performed + ResidualType norm_residual; ///< norm of the final residual }; /** - * @brief High-Performance Conjugate Gradient algorithm implementation running entirely on GraphBLAS. + * Conjugate Gradient algorithm implementation augmented by a Multi-Grid solver, + * inspired to the High Performance Conjugate Gradient benchmark. * - * Finds the solution x of an \f$ A x = b \f$ algebraic system by running the HPCG algorithm. - * The implementation here closely follows the reference HPCG benchmark used for the HPCG500 rank, - * visible at https://github.com/hpcg-benchmark/hpcg. - * The only difference is the usage of a Red-Black Gauss-Seidel smoother instead of the standard one - * for performance reasons, as the standard Gauss-Seidel algorithm is inherently sequential and not - * expressible in terms of standard linear algebra operations. - * In particular, this implementation (as the standard one) couples a standard CG algorithm with a V-cycle - * multi-grid solver to initially refine the tentative solution. This refinement step depends on the - * availability of coarsening information, which should be stored inside \p data; otherwise, - * the refinement is not performed and only the CG algorithm is run. For more information on inputs - * and on coarsening information, you may consult the \ref hpcg_data class documentation. + * This CG solver calls the MG solver at the beginning of each iteration to improve + * the initial solution via the residual (thanks to the smoother) and then proceeds with + * the standard CG iteration. * - * This implementation assumes that the vectors and matrices inside \p data are all correctly initialized - * and populated with the proper values; in particular - * - hpcg_data#x with the initial tentative solution (iterative solutions are also stored here) - * - hpcg_data#A with the system matrix - * - hpcg_data#b with the right-hand side vector \f$ b \f$ - * - hpcg_data#A_diagonal with the diagonal values of the matrix - * - hpcg_data#color_masks with the color masks for this level - * - hpcg_data#coarser_level with the information for the coarser multi-grid run (if any) - * The other vectors are assumed to be inizialized (via the usual grb::Vector#Vector(size_t) constructor) - * but not necessarily populated with values, as they are internally populated when needed; hence, - * any previous values are overwritten. - * - * Failuers of GraphBLAS operations are handled by immediately stopping the execution and by returning + * Failures of GraphBLAS operations are handled by immediately stopping the execution and by returning * the failure code. * * @tparam IOType type of result and intermediate vectors used during computation * @tparam ResidualType type of the residual norm * @tparam NonzeroType type of matrix values * @tparam InputType type of values of the right-hand side vector b - * @tparam Ring the ring of algebraic operators zero-values - * @tparam Minus the minus operator for subtractions + * @tparam MultiGridrunnerType type for the multi-grid runner object + * @tparam Ring algebraic ring type + * @tparam Minus minus operator * - * @param[in,out] data \ref hpcg_data object storing inputs, outputs and temporary vectors used for the computation, - * as long as the information for the recursive multi-grid runs - * @param[in] with_preconditioning whether to use pre-conditioning, i.e. to perform multi-grid runs - * @param[in] presmoother_steps number of pre-smoother steps, for multi-grid runs - * @param[in] postsmoother_steps nomber of post-smoother steps, for multi-grid runs - * @param[in] max_iterations maximum number if iterations the simulation may run for; once reached, - * the simulation stops even if the residual norm is above \p tolerance - * @param[in] tolerance the tolerance over the residual norm, i.e. the value of the residual norm to stop - * the simulation at - * @param[out] iterations numbers of iterations performed - * @param[out] norm_residual norm of the final residual - * @param[in] ring the ring to perform the operations on - * @param[in] minus the \f$ - \f$ operator for vector subtractions - * @return grb::RC::SUCCESS if the algorithm could correctly terminate, the error code of the first - * unsuccessful operation otherwise + * @param cg_data data for the CG solver only + * @param cg_opts options for the CG solver + * @param grid_base base (i.e., finer) level of the multi-grid, with the information of the physical system + * @param MultiGridRunner runner object (functor) to call the multi-grid solver + * @param out_info solver output information + * @return grb::RC SUCCESS in case of succesful run */ template< typename IOType, @@ -169,27 +145,28 @@ namespace grb { typename MultiGridrunnerType, class Ring = Semiring< grb::operators::add< IOType >, grb::operators::mul< IOType >, grb::identities::zero, grb::identities::one >, class Minus = operators::subtract< IOType > - > grb::RC mg_cg( - multigrid_data< IOType, NonzeroType > &grid_base, - mg_cg_data< IOType, NonzeroType, InputType > &data, - const cg_options< IOType, ResidualType > &cg_opts, + > grb::RC multigrid_conjugate_gradient( + MultiGridCGData< IOType, NonzeroType, InputType > &cg_data, + const CGOptions< IOType, ResidualType, Ring, Minus > &cg_opts, + MultiGridData< IOType, NonzeroType > &grid_base, MultiGridrunnerType &multigrid_runner, - cg_out_data< ResidualType > &out_data + CGOutInfo< ResidualType > &out_info ) { ResidualType alpha; - const grb::Matrix< NonzeroType > &A { grid_base.A }; - grb::Vector< IOType > &r { grid_base.r }; // residual vector - grb::Vector< IOType > &z { grid_base.z }; // pre-conditioned residual vector - grb::Vector< IOType > &x { data.x }; - const grb::Vector< InputType > &b { data.b }; - grb::Vector< IOType > &p { data.p }; // direction vector - grb::Vector< IOType > &Ap { data.u }; // temp vector - grb::RC ret { SUCCESS }; + const grb::Matrix< NonzeroType > &A = grid_base.A; + grb::Vector< IOType > &r = grid_base.r; // residual vector + grb::Vector< IOType > &z = grid_base.z; // pre-conditioned residual vector + grb::Vector< IOType > &x = cg_data.x; + const grb::Vector< InputType > &b = cg_data.b; + grb::Vector< IOType > &p = cg_data.p; // direction vector + grb::Vector< IOType > &Ap = cg_data.u; // temp vector + grb::RC ret = SUCCESS; - ret = ret ? ret : grb::set( Ap, 0 ); - ret = ret ? ret : grb::set( r, 0 ); - ret = ret ? ret : grb::set( p, 0 ); + const IOType io_zero = cg_opts.ring.template getZero< IOType >(); + ret = ret ? ret : grb::set( Ap, io_zero ); + ret = ret ? ret : grb::set( r, io_zero ); + ret = ret ? ret : grb::set( p, io_zero ); ret = ret ? ret : grb::set( p, x ); ret = ret ? ret : grb::mxv< grb::descriptors::dense >( Ap, A, x, cg_opts.ring ); // Ap = A * x @@ -198,7 +175,8 @@ namespace grb { ret = ret ? ret : grb::eWiseApply( r, b, Ap, cg_opts.minus ); // r = b - Ap; assert( ret == SUCCESS ); - ResidualType norm_residual = cg_opts.ring.template getZero< ResidualType >(); + const ResidualType residual_zero = cg_opts.ring.template getZero< ResidualType >(); + ResidualType norm_residual = residual_zero; ret = ret ? ret : grb::dot( norm_residual, r, r, cg_opts.ring ); // norm_residual = r' * r; assert( ret == SUCCESS ); @@ -206,10 +184,10 @@ namespace grb { norm_residual = std::sqrt( norm_residual ); // initial norm of residual - out_data.norm_residual = norm_residual; - const ResidualType norm_residual_initial { norm_residual }; - ResidualType old_r_dot_z { 0.0 }, r_dot_z { 0.0 }, beta { 0.0 }; - size_t iter { 0 }; + out_info.norm_residual = norm_residual; + const ResidualType norm_residual_initial = norm_residual; + ResidualType old_r_dot_z = residual_zero, r_dot_z = residual_zero, beta = residual_zero; + size_t iter = 0; grb::utils::Timer timer; @@ -258,7 +236,7 @@ namespace grb { assert( ret == SUCCESS ); beta = r_dot_z / old_r_dot_z; - ret = ret ? ret : grb::clear( Ap ); // Ap = 0; + ret = ret ? ret : grb::set( Ap, io_zero ); // Ap = 0; ret = ret ? ret : grb::eWiseMulAdd( Ap, beta, p, z, cg_opts.ring ); // Ap += beta * p + z; std::swap( Ap, p ); // p = Ap; assert( ret == SUCCESS ); @@ -267,7 +245,7 @@ namespace grb { DBG_print_norm( p, "middle p" ); #endif - ret = ret ? ret : grb::set( Ap, 0 ); + ret = ret ? ret : grb::set( Ap, io_zero ); ret = ret ? ret : grb::mxv< grb::descriptors::dense >( Ap, A, p, cg_opts.ring ); // Ap = A * p; assert( ret == SUCCESS ); #ifdef HPCG_PRINT_STEPS @@ -302,17 +280,31 @@ namespace grb { } ++iter; - out_data.iterations = iter; - out_data.norm_residual = norm_residual; + out_info.iterations = iter; + out_info.norm_residual = norm_residual; } while( iter < cg_opts.max_iterations && norm_residual / norm_residual_initial > cg_opts.tolerance && ret == SUCCESS ); return ret; } - - - + /** + * Runner object incapsulating all information to run a Conjugate Gradient solver + * with multi-grid. + * + * The multi-grid runner must be constructed separately (depending on the chosen algorithm) + * and move-transfered during construction of this runner. + * The \p MultiGridrunnerType must implement a functional interface whose input (from CG) + * is the structure with the system information for one level of the grid. + * + * @tparam IOType type of result and intermediate vectors used during computation + * @tparam NonzeroType type of matrix values + * @tparam InputType type of values of the right-hand side vector b + * @tparam ResidualType type of the residual norm + * @tparam MultiGridrunnerType type for the multi-grid runner object + * @tparam Ring algebraic ring type + * @tparam Minus minus operator + */ template< typename IOType, typename NonzeroType, @@ -321,35 +313,47 @@ namespace grb { typename MultiGridRunnerType, class Ring, class Minus + > struct MultiGridCGRunner { - > struct mg_cg_runner { - - using HPCGInputType = mg_cg_data< IOType, NonzeroType, InputType >; + using HPCGInputType = MultiGridCGData< IOType, NonzeroType, InputType >; static_assert( std::is_default_constructible< Ring >::value, "cannot construct the Ring with default values" ); static_assert( std::is_default_constructible< Minus >::value, "cannot construct the Minus operator with default values" ); - // static_assert( std::is_copy_constructible< MultiGridRunnerType >::value, - // "cannot construct the Multi-Grid runner by copy" ); static_assert( std::is_move_constructible< MultiGridRunnerType >::value, "cannot construct the Multi-Grid runner by move" ); // default value: override with your own - cg_options< IOType, ResidualType, Ring, Minus > cg_opts{ true, 10, 0.0, false, Ring(), Minus() }; + CGOptions< IOType, ResidualType, Ring, Minus > cg_opts = { true, 10, + Ring(). template getZero< ResidualType >(), false, Ring(), Minus() }; MultiGridRunnerType mg_runner; - mg_cg_runner( + /** + * Construct a new MultiGridCGRunner object by moving the required MG runner. + * + * Moving the state of the MG is safer in that it avoids use-after-free issues, + * as the state of the MG runner is managed automatically with this object. + */ + MultiGridCGRunner( MultiGridRunnerType &&_mg_runner ) : mg_runner( std::move( _mg_runner ) ) {} + /** + * Functional operator to invoke a full CG-MG computation. + * + * @param grid_base base level of the grid + * @param cg_data data for CG + * @param out_info output information from CG + * @return grb::RC indicating the success or the error occurred + */ inline grb::RC operator()( typename MultiGridRunnerType::MultiGridInputType &grid_base, - mg_cg_data< IOType, NonzeroType, InputType > &data, - cg_out_data< ResidualType > &out_data + MultiGridCGData< IOType, NonzeroType, InputType > &cg_data, + CGOutInfo< ResidualType > &out_info ) { - return mg_cg( grid_base, data, cg_opts, mg_runner, out_data ); + return multigrid_conjugate_gradient( cg_data, cg_opts, grid_base, mg_runner, out_info ); } }; diff --git a/include/graphblas/algorithms/multigrid/multigrid_data.hpp b/include/graphblas/algorithms/multigrid/multigrid_data.hpp index e76063aec..6462e4019 100644 --- a/include/graphblas/algorithms/multigrid/multigrid_data.hpp +++ b/include/graphblas/algorithms/multigrid/multigrid_data.hpp @@ -1,6 +1,6 @@ /* - * Copyright 2021 Huawei Technologies Co., Ltd. + * Copyright 2022 Huawei Technologies Co., Ltd. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,10 +16,9 @@ */ /** - * @file hpcg_data.hpp + * @file multigrid_data.hpp * @author Alberto Scolari (alberto.scolari@huawei.com) - * @brief Data structures to store HPCG input/output data. - * @date 2021-04-30 + * Data structure definition to store the information of a single multi-grid level. */ #ifndef _H_GRB_ALGORITHMS_HPCG_DATA @@ -36,46 +35,33 @@ namespace grb { namespace algorithms { /** - * @brief Data container for all multi-grid inputs and outputs. + * This data structure stores information for a \b single multi-grid level. This information + * dependes exclusively on the size of the underlying physical system. + + * + * Internal ALP/GraphBLAS containers are initialized to the proper size, + * but their values are \b not initialized as this depends on the specific algorithm chosen + * for the multi-grid solver. Populating them is user's task. * * @tparam IOType Type of values of the vectors for intermediate results * @tparam NonzeroType Type of the values stored inside the system matrix \p A * and the coarsening matrix #Ax_finer - * - * This data structure stores information for a full multi-grid V cycle, i.e. - * - input and output vectors for solution, residual and temporary vectors - * - coarsening information, in particular the #coarsening_matrix that - * coarsens a larger system of size #finer_size to the current system - * of size #system_size - * - the next level of coarsening, pointed to by #coarser_level, possibly being \c nullptr - * if no further coarsening is desired; note that this information is automatically - * destructed on object destruction (if any) - * - * Vectors stored here refer to the \b coarsened system (with the exception of #Ax_finer), - * thus having size #system_size; this also holds for the system matrix #A, - * while #coarsening_matrix has size #system_size \f$ \times \f$ #finer_size. - * Hence, the typical usage of this data structure is to coarsen \b external vectors, e.g. vectors - * coming from another \code multigrid_data \endcode object whose #system_size equals - * \code this-> \endcode #fines_size, via \code this-> \endcode #coarsening_matrix and store the coarsened - * vectors internally. Mimicing the recursive behavior of standard multi-grid simulations, - * the information for a further coarsening is stored inside #coarser_level, so that the - * hierarchy of coarsened levels is reflected inside this data structure. - * - * As for \ref system_data, internal vectors and matrices are initialized to the proper size, - * but their values are \b not initialized. */ template< typename IOType, typename NonzeroType - > struct multigrid_data { - - const size_t level; - const size_t system_size; ///< size of the system, i.e. side of the #A - grb::Matrix< NonzeroType > A; ///< system matrix - grb::Vector< IOType > z; ///< multi-grid solution - grb::Vector< IOType > r; ///< residual - - multigrid_data( + > struct MultiGridData { + + const size_t level; ///< level of the grid (0 for the finest physical system) + const size_t system_size; ///< size of the system, i.e. side of the #A system matrix + grb::Matrix< NonzeroType > A; ///< system matrix + grb::Vector< IOType > z; ///< multi-grid solution + grb::Vector< IOType > r; ///< residual + + /** + * Construct a new multigrid data object from level information and system size. + */ + MultiGridData( size_t _level, size_t sys_size ) : @@ -86,13 +72,13 @@ namespace grb { r( sys_size ) {} // for safety, disable copy semantics - multigrid_data( const multigrid_data< IOType, NonzeroType > & o ) = delete; + MultiGridData( const MultiGridData< IOType, NonzeroType > & o ) = delete; - multigrid_data & operator=( const multigrid_data< IOType, NonzeroType > & ) = delete; + MultiGridData & operator=( const MultiGridData< IOType, NonzeroType > & ) = delete; - grb::RC zero_temp_vectors() { - grb::RC rc = grb::set( z, 0 ); - rc = rc ? rc : grb::set( r, 0 ); + grb::RC init_vectors( IOType zero ) { + grb::RC rc = grb::set( z, zero ); + rc = rc ? rc : grb::set( r, zero ); return rc; } }; diff --git a/include/graphblas/algorithms/multigrid/multigrid_v_cycle.hpp b/include/graphblas/algorithms/multigrid/multigrid_v_cycle.hpp index 77b785e2d..963da74d5 100644 --- a/include/graphblas/algorithms/multigrid/multigrid_v_cycle.hpp +++ b/include/graphblas/algorithms/multigrid/multigrid_v_cycle.hpp @@ -1,6 +1,6 @@ /* - * Copyright 2021 Huawei Technologies Co., Ltd. + * Copyright 2022 Huawei Technologies Co., Ltd. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,9 +18,8 @@ /** * @file multigrid_v_cycle.hpp * @author Alberto Scolari (alberto.scolari@huawei.com) - * @brief This file contains the routines for multi-grid solution refinement, including the main routine + * This file contains the routines for multi-grid solution refinement, including the main routine * and those for coarsening and refinement of the tentative solution. - * @date 2021-04-30 */ #ifndef _H_GRB_ALGORITHMS_MULTIGRID_V_CYCLE @@ -33,57 +32,51 @@ #include #include - #include #include "multigrid_data.hpp" namespace grb { namespace algorithms { - /** - * @brief Namespace for interfaces that should not be used outside of the algorithm namespace. - */ - namespace internal { - - - - } // namespace internal /** - * @brief Multi-grid V cycle implementation to refine a given solution. + * Multi-grid V cycle implementation to refine a given solution. * * A full multi-grid run goes through the following steps: - * -# if \p presmoother_steps \f$ > 0 \f$, \p presmoother_steps of the Red-Black Gauss-Seidel smoother are run - * to improve on the initial solution stored into \p data.z - * -# the coarsening of \f$ r - A*z \f$ is computed to find the coarser residual vector - * -# a multi-grid run is recursively performed on the coarser system - * -# the tentative solution from the coarser multi-grid run is prolonged and added to the current tentative solution - * into \p data.z - * -# this solution is further smoothed for \p postsmoother_steps steps * - * If coarsening information is not available, the multi-grid run consists in a single smmothing run. + * 1. calls the pre-smoother to improve on the initial solution stored into \p mgiter_begin->z + * 2. coarsens the residual vector + * 3. recursively solves the coarser system + * 4. prolongs the coarser solution into the \p mgiter_begin->z + * 5. further smooths the solution wih a post-smoother call + * + * The algorithm moves across grid levels via the STL-like iterators \p mgiter_begin + * and \p mgiter_end and accesses the grid data via the former (using the operator \c * ): when + * \p mgiter_begin \c == \p mgiter_end , a smoothing round is invoked and the recursion halted. * - * Failuers of GraphBLAS operations are handled by immediately stopping the execution and by returning - * the failure code. + * Failuers of GraphBLAS operations are handled by immediately stopping the execution + * and returning the failure code. * * @tparam IOType type of result and intermediate vectors used during computation * @tparam NonzeroType type of matrix values + * @tparam MGSysIterType type of the iterator across grid levels + * @tparam MGSmootherType type of the smoother runner, with prescribed methods for the various + * smoothing steps + * @tparam CoarsenerType type of the coarsener runner, with prescribed methods for coarsening + * and prolongation * @tparam Ring the ring of algebraic operators zero-values * @tparam Minus the minus operator for subtractions * - * @param[in,out] data \ref multigrid_data object storing the relevant data for the multi-grid run of the current - * clevel - * @param[in,out] coarsening_data pointer to information for the coarsening/refinement operations and for the - * recursive multi-grid run on the coarsened system; if \c nullptr, no coarsening/refinement occurs - * and only smoothing occurs on the current solution - * @param[in] presmoother_steps number of pre-smoother steps - * @param[in] postsmoother_steps number of post-smoother steps - * @param[in] ring the ring to perform the operations on - * @param[in] minus the \f$ - \f$ operator for vector subtractions - * @return grb::RC::SUCCESS if the algorithm could correctly terminate, the error code of the first - * unsuccessful operation otherwise + * @param mgiter_begin iterator pointing to the current level of the multi-grid + * @param mgiter_end end iterator, indicating the end of the recursion + * @param smoother callable object to invoke the smoothing steps + * @param coarsener callable object to coarsen and prolong (between current and coarser grid levels) + * @param ring the ring to perform the operations on + * @param minus the \f$ - \f$ operator for vector subtractions + * @return grb::RC if the algorithm could correctly terminate, the error code of the first + * unsuccessful operation otherwise */ - template< + template < typename IOType, typename NonzeroType, typename MGSysIterType, @@ -99,22 +92,21 @@ namespace grb { const Ring &ring, const Minus &minus ) { - static_assert( std::is_base_of< multigrid_data< IOType, NonzeroType >, + static_assert( std::is_base_of< MultiGridData< IOType, NonzeroType >, typename std::decay< decltype( *mgiter_begin ) >::type >::value, "the iterator type MGSysIterType" - " must reference an object of type multigrid_data< IOType, NonzeroType >" ); + " must reference an object of type MultiGridData< IOType, NonzeroType >" ); - RC ret { SUCCESS }; + RC ret = SUCCESS; assert( mgiter_begin != mgiter_end ); - multigrid_data< IOType, NonzeroType > &finer_system = *mgiter_begin; + MultiGridData< IOType, NonzeroType > &finer_system = *mgiter_begin; ++mgiter_begin; #ifdef HPCG_PRINT_STEPS DBG_println( "mg BEGINNING {" ); #endif - // clean destination vector - ret = ret ? ret : grb::set( finer_system.z, 0 ); + ret = ret ? ret : grb::set( finer_system.z, ring. template getZero< IOType >() ); #ifdef HPCG_PRINT_STEPS DBG_print_norm( finer_system.r, "initial r" ); #endif @@ -128,7 +120,7 @@ namespace grb { #endif return ret; } - multigrid_data< IOType, NonzeroType > &coarser_system = *mgiter_begin; + MultiGridData< IOType, NonzeroType > &coarser_system = *mgiter_begin; // pre-smoother ret = ret ? ret : smoother.pre_smooth( finer_system ); @@ -165,15 +157,31 @@ namespace grb { return ret; } + /** + * Callable object to invoke the V-cycle multi-grid algorithm, which also requires + * a smoother and a coarsener object. + * + * It is built by transferring into it the state of both the smoother and the coarsener, + * in order to avoid use-after-free issues. + * + * @tparam IOType type of result and intermediate vectors used during computation + * @tparam NonzeroType type of matrix values + * @tparam MGSysIterType type of the iterator across grid levels + * @tparam MGSmootherType type of the smoother runner, with prescribed methods for the various + * smoothing steps + * @tparam CoarsenerType type of the coarsener runner, with prescribed methods for coarsening + * and prolongation + * @tparam Ring the ring of algebraic operators and zero values + * @tparam Minus the minus operator for subtractions + */ template< typename IOType, typename NonzeroType, - typename InputType, typename MGSmootherType, typename CoarsenerType, class Ring, class Minus - > struct multigrid_runner { + > struct MultiGridRunner { static_assert( std::is_default_constructible< Ring >::value, "cannot construct the Ring with default values" ); @@ -184,19 +192,20 @@ namespace grb { static_assert( std::is_move_constructible< CoarsenerType >::value, "CoarsenerType must be move-constructible"); - using MultiGridInputType = multigrid_data< IOType, NonzeroType >; + using MultiGridInputType = MultiGridData< IOType, NonzeroType >; // check the interface between HPCG and MG match static_assert( std::is_base_of< typename MGSmootherType::SmootherInputType, MultiGridInputType >::value, "input type of the Smoother kernel must match the input from Multi-Grid" ); - MGSmootherType smoother_runner; - CoarsenerType coarsener_runner; - std::vector< std::unique_ptr< MultiGridInputType > > system_levels; - Ring ring; - Minus minus; + MGSmootherType smoother_runner; ///< object to run the smoother + CoarsenerType coarsener_runner; ///< object to run the coarsener + std::vector< std::unique_ptr< MultiGridInputType > > system_levels; ///< levels of the grid (finest first) + Ring ring; ///< algebraic ring + Minus minus; ///< minus operator - struct Extractor { + // operator to extract the reference out of an std::unique_ptr object + struct __extractor { MultiGridInputType & operator()( typename std::vector< std::unique_ptr< MultiGridInputType > >::reference &ref ) { @@ -210,23 +219,29 @@ namespace grb { } }; - using UniquePtrExtractor = grb::utils::IteratorValueAdaptor< + using __unique_ptr_extractor = grb::utils::IteratorValueAdaptor< typename std::vector< std::unique_ptr< MultiGridInputType > >::iterator, - Extractor + __extractor >; - - multigrid_runner( + /** + * Construct a new MultiGridRunner object by moving in the state of the pre-built + * smoother and coarsener. + */ + MultiGridRunner( MGSmootherType &&_smoother_runner, CoarsenerType &&_coarsener_runner ) : smoother_runner( std::move( _smoother_runner ) ), coarsener_runner( std::move( _coarsener_runner ) ) {} - inline grb::RC operator()( - MultiGridInputType &system - ) { - return multi_grid< IOType, NonzeroType, UniquePtrExtractor, MGSmootherType, CoarsenerType, Ring, Minus >( - UniquePtrExtractor( system_levels.begin() += system.level ), UniquePtrExtractor( system_levels.end() ), + /** + * Operator to invoke a full multi-grid run starting from the given level. + */ + inline grb::RC operator()( MultiGridInputType &system ) { + return multi_grid< IOType, NonzeroType, __unique_ptr_extractor, + MGSmootherType, CoarsenerType, Ring, Minus >( + __unique_ptr_extractor( system_levels.begin() += system.level ), + __unique_ptr_extractor( system_levels.end() ), smoother_runner, coarsener_runner, ring, minus ); } }; diff --git a/include/graphblas/algorithms/multigrid/red_black_gauss_seidel.hpp b/include/graphblas/algorithms/multigrid/red_black_gauss_seidel.hpp index 615b4340b..97d0c80e4 100644 --- a/include/graphblas/algorithms/multigrid/red_black_gauss_seidel.hpp +++ b/include/graphblas/algorithms/multigrid/red_black_gauss_seidel.hpp @@ -1,6 +1,6 @@ /* - * Copyright 2021 Huawei Technologies Co., Ltd. + * Copyright 2022 Huawei Technologies Co., Ltd. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,8 +18,7 @@ /** * @file red_black_gauss_seidel.hpp * @author Alberto Scolari (alberto.scolari@huawei.com) - * @brief Contains the routines to perform a forward-backward pass of a Red-Black Gauss-Seidel smoother. - * @date 2021-04-30 + * Contains the routines to perform a forward-backward pass of a Red-Black Gauss-Seidel smoother. */ #ifndef _H_GRB_ALGORITHMS_RED_BLACK_GAUSS_SEIDEL @@ -34,30 +33,36 @@ namespace grb { namespace algorithms { - template< typename IOType > struct smoother_data { + /** + * Data structures to run the RBGS smoother on a single level of the multi-grid. + */ + template< typename IOType > struct SmootherData { - grb::Vector< IOType > A_diagonal; ///< vector with the diagonal of #A - grb::Vector< IOType > smoother_temp; ///< for smoother's intermediate results + grb::Vector< IOType > A_diagonal; ///< vector with the diagonal of #A + grb::Vector< IOType > smoother_temp; ///< for smoother's intermediate results std::vector< grb::Vector< bool > > color_masks; ///< for color masks - smoother_data( size_t sys_size ) : + /** + * Construct a new SmootherData object from the level size. + */ + SmootherData( size_t sys_size ) : A_diagonal( sys_size ), - smoother_temp( sys_size ) { } + smoother_temp( sys_size ) {} // for safety, disable copy semantics - smoother_data( const smoother_data & o ) = delete; + SmootherData( const SmootherData & o ) = delete; - smoother_data & operator=( const smoother_data & ) = delete; + SmootherData & operator=( const SmootherData & ) = delete; - grb::RC zero_temp_vectors() { - return grb::set( smoother_temp, 0 ); + grb::RC init_vectors( IOType zero ) { + return grb::set( smoother_temp, zero ); } }; namespace internal { /** - * @brief Runs a single step of Red-Black Gauss-Seidel for a specific color. + * Runs a single step of Red-Black Gauss-Seidel for a specific color. * * @tparam IOType type of result and intermediate vectors used during computation * @tparam NonzeroType type of matrix values @@ -71,7 +76,7 @@ namespace grb { * @param[in] color_mask the mask of colors to filter the rows to smooth * @param[in] ring the ring to perform the operations on * @return grb::RC::SUCCESS if the algorithm could correctly terminate, the error code of the first - * unsuccessful operation otherwise + * unsuccessful operation otherwise */ template< typename IOType, @@ -86,8 +91,8 @@ namespace grb { const grb::Vector< bool > & color_mask, const Ring & ring ) { - RC ret { SUCCESS }; - ret = ret ? ret : grb::set( smoother_temp, 0 ); + RC ret = SUCCESS; + ret = ret ? ret : grb::set( smoother_temp, ring. template getZero< IOType >() ); // acc_temp[mask] = A[mask] * x[mask] ret = ret ? ret : grb::mxv< grb::descriptors::safe_overlap >( smoother_temp, color_mask, A, x, ring ); @@ -98,37 +103,38 @@ namespace grb { // x[mask] = r[mask] - smoother_temp[mask] + x[mask] .* diagonal[mask] // x[mask] = x[maks] ./ diagonal[mask] ret = ret ? ret : - grb::eWiseLambda( - [ &x, &r, &smoother_temp, &color_mask, &A_diagonal ]( const size_t i ) { - // if the mask was properly initialized, the check on the mask value is unnecessary; - // nonetheless, it is left not to violate the semantics of RBGS in case also the false values - // had been initialized (in which case the check is fundamental); if only true values were initialized, - // we expect CPU branch prediction to neutralize the branch cost - // if( color_mask[ i ] ) { - IOType d = A_diagonal[ i ]; - IOType v = r[ i ] - smoother_temp[ i ] + x[ i ] * d; - x[ i ] = v / d; - // } - }, - color_mask, x, r, smoother_temp, A_diagonal ); + grb::eWiseLambda( + [ &x, &r, &smoother_temp, &color_mask, &A_diagonal ]( const size_t i ) { + // if the mask was properly initialized, the check on the mask value is unnecessary; + // nonetheless, it is left not to violate the semantics of RBGS in case also the false values + // had been initialized (in which case the check is fundamental); if only true values were initialized, + // we expect CPU branch prediction to neutralize the branch cost + // if( color_mask[ i ] ) { + IOType d = A_diagonal[ i ]; + IOType v = r[ i ] - smoother_temp[ i ] + x[ i ] * d; + x[ i ] = v / d; + // } + }, + color_mask, x, r, smoother_temp, A_diagonal ); assert( ret == SUCCESS ); return ret; } /** - * @brief Runs a single forward and backward pass of Red-Black Gauss-Seidel smoothing on the system stored in \p data. + * Runs a single forward and backward pass of Red-Black Gauss-Seidel smoothing + * on the system stored in \p data. * - * This routine performs a forward and a backward step of Red-Black Gauss-Seidel for each color stored in \p data.color_masks. - * Color stored inside this container are assumed to be mutually exclusive and to cover all rows of the solution vector<\b>, - * and no check is performed to ensure these assumptions hold. Hence, it is up to user logic to generate and pass correct - * coloring information. Otherwise, \b no guarantees hold on the result. + * This routine performs a forward and a backward step of Red-Black Gauss-Seidel for each color + * stored in \p data.color_masks. Colors stored inside this container + * are assumed to be mutually exclusive and to cover all rows of the solution vector<\b>, + * and no check is performed to ensure these assumptions hold. Hence, it is up to user logic + * to pass correct coloring information. Otherwise, \b no guarantees hold on the result. * * @tparam IOType type of result and intermediate vectors used during computation * @tparam NonzeroType type of matrix values * @tparam Ring the ring of algebraic operators zero-values * - * @param data \ref system_data data structure with relevant inpus and outputs: system matrix, initial solution, - * residual, system matrix colors, temporary vectors + * @param[in,out] data structure with the data of a single grid level * @param[in] ring the ring to perform the operations on * @return grb::RC::SUCCESS if the algorithm could correctly terminate, the error code of the first * unsuccessful operation otherwise @@ -138,22 +144,22 @@ namespace grb { typename NonzeroType, class Ring > grb::RC red_black_gauss_seidel( - multigrid_data< IOType, NonzeroType > &data, - smoother_data< IOType > &smoothing_info, + MultiGridData< IOType, NonzeroType > &data, + SmootherData< IOType > &smoothing_info, const Ring & ring ) { - RC ret { SUCCESS }; + RC ret = SUCCESS; // forward step - std::vector< grb::Vector< bool > >::const_iterator end { smoothing_info.color_masks.cend() }; - for( std::vector< grb::Vector< bool > >::const_iterator it { - smoothing_info.color_masks.cbegin() }; it != end && ret == SUCCESS; ++it ) { + using cit_t = typename std::vector< grb::Vector< bool > >::const_iterator; + cit_t end = smoothing_info.color_masks.cend(); + for( cit_t it = smoothing_info.color_masks.cbegin(); it != end && ret == SUCCESS; ++it ) { ret = rbgs_single_step( data.A, smoothing_info.A_diagonal, data.r, data.z, smoothing_info.smoother_temp, *it, ring ); } // backward step - std::vector< grb::Vector< bool > >::const_reverse_iterator rend { smoothing_info.color_masks.crend() }; - for( std::vector< grb::Vector< bool > >::const_reverse_iterator rit { - smoothing_info.color_masks.crbegin() }; rit != rend && ret == SUCCESS; ++rit ) { + using crit_t = typename std::vector< grb::Vector< bool > >::const_reverse_iterator; + crit_t rend = smoothing_info.color_masks.crend(); + for( crit_t rit = smoothing_info.color_masks.crbegin(); rit != rend && ret == SUCCESS; ++rit ) { ret = rbgs_single_step( data.A, smoothing_info.A_diagonal, data.r, data.z, smoothing_info.smoother_temp, *rit, ring ); } @@ -162,64 +168,62 @@ namespace grb { } // namespace internal + /** + * Runner object for the RBGS smoother, with multiple methods for each type of smoothing step: + * pre-, post- and non-recursive, as invoked during a full run of a multi-grid V-cycle. + * + * It stores the information to smooth each level of the grid, to be initalized separately. + * + * @tparam IOType type of result and intermediate vectors used during computation + * @tparam NonzeroType type of matrix values + * @tparam Ring the ring of algebraic operators + */ template < typename IOType, typename NonzeroType, class Ring - > struct red_black_smoother_runner { - size_t presmoother_steps ; - size_t postsmoother_steps; - size_t non_recursive_smooth_steps; - std::vector< std::unique_ptr< smoother_data< IOType > > > levels; - Ring ring; + > struct RedBlackGSSmootherRunner { + + size_t presmoother_steps; ///< number of pre-smoother steps + size_t postsmoother_steps; ///< number of post-smoother steps + size_t non_recursive_smooth_steps; ///< number of smoother steps for the last grid level + std::vector< std::unique_ptr< SmootherData< IOType > > > levels; ///< for each grid level, + ///< the smoothing data (finest first) + Ring ring; ///< the algebraic ring static_assert( std::is_default_constructible< Ring >::value, "cannot construct the Ring operator with default values" ); - using SmootherInputType = multigrid_data< IOType, NonzeroType >; + using SmootherInputType = MultiGridData< IOType, NonzeroType >; - inline grb::RC pre_smooth( - SmootherInputType& data - ) { - return run_smoother( data, presmoother_steps ); + inline grb::RC pre_smooth( SmootherInputType& data ) { + return __run_smoother( data, presmoother_steps ); } - inline grb::RC post_smooth( - SmootherInputType& data - ) { - return run_smoother( data, postsmoother_steps ); + inline grb::RC post_smooth( SmootherInputType& data ) { + return __run_smoother( data, postsmoother_steps ); } - inline grb::RC nonrecursive_smooth( - SmootherInputType& data - ) { - return run_smoother( data, non_recursive_smooth_steps ); + inline grb::RC nonrecursive_smooth( SmootherInputType& data ) { + return __run_smoother( data, non_recursive_smooth_steps ); } /** - * @brief Runs \p smoother_steps iteration of the Red-Black Gauss-Seidel smoother, with inputs and outputs stored - * inside \p data. + * Runs \p smoother_steps iteration of the Red-Black Gauss-Seidel smoother, + * with inputs and outputs stored inside \p data. * - * @tparam IOType type of result and intermediate vectors used during computation - * @tparam NonzeroType type of matrix values - * @tparam Ring the ring of algebraic operators zero-values - * - * @param[in,out] data \ref system_data data structure with relevant inpus and outputs: system matrix, initial solution, - * residual, system matrix colors, temporary vectors - * @param[in] smoother_steps how many smoothing steps to run - * @param[in] ring the ring to perform the operations on - * @return grb::RC::SUCCESS if the algorithm could correctly terminate, the error code of the first - * unsuccessful operation otherwise + * This is an internal method called by all user-facing methods, because this specific + * smoother performs all smoothing steps the same way. */ - grb::RC run_smoother( + grb::RC __run_smoother( SmootherInputType &data, const size_t smoother_steps ) { - RC ret { SUCCESS }; + RC ret = SUCCESS; - smoother_data< IOType > &smoothing_info = *( levels.at( data.level ).get() ); + SmootherData< IOType > &smoothing_info = *( levels.at( data.level ).get() ); - for( size_t i { 0 }; i < smoother_steps && ret == SUCCESS; i++ ) { + for( size_t i = 0; i < smoother_steps && ret == SUCCESS; i++ ) { ret = ret ? ret : internal::red_black_gauss_seidel( data, smoothing_info, ring ); assert( ret == SUCCESS ); } diff --git a/include/graphblas/algorithms/multigrid/coarsener.hpp b/include/graphblas/algorithms/multigrid/single_matrix_coarsener.hpp similarity index 70% rename from include/graphblas/algorithms/multigrid/coarsener.hpp rename to include/graphblas/algorithms/multigrid/single_matrix_coarsener.hpp index 47116c22a..e1ef7db73 100644 --- a/include/graphblas/algorithms/multigrid/coarsener.hpp +++ b/include/graphblas/algorithms/multigrid/single_matrix_coarsener.hpp @@ -16,14 +16,13 @@ */ /** - * @file hpcg_data.hpp + * @file single_matrix_coarsener.hpp * @author Alberto Scolari (alberto.scolari@huawei.com) - * @brief Implementation of the coarsener of HPCG - * @date 2022-11-08 + * Implementation of a coarsener using the same matrix for both coarsening and prolongation. */ -#ifndef _H_GRB_ALGORITHMS_HPCG_COARSENER -#define _H_GRB_ALGORITHMS_HPCG_COARSENER +#ifndef _H_GRB_ALGORITHMS_HPCG_SINGLE_MATRIX_COARSENER +#define _H_GRB_ALGORITHMS_HPCG_SINGLE_MATRIX_COARSENER #include #include @@ -35,37 +34,40 @@ namespace grb { namespace algorithms { + /** + * Structure storing the data for the coarsener + */ template< typename IOType, typename NonzeroType - > - struct coarsening_data { + > struct CoarseningData { grb::Matrix< NonzeroType > coarsening_matrix; ///< matrix of size #system_size \f$ \times \f$ #finer_size ///< to coarsen an input vector of size #finer_size into a vector of size #system_size grb::Vector< IOType > Ax_finer; ///< finer vector for intermediate computations, of size #finer_size /** - * @brief Construct a new \c coarsening_data by initializing internal data structures - * @param[in] coarser_size size of the current system, i.e. size \b after coarsening + * Construct a new CoarseningData object by initializing internal data structures. + * * @param[in] _finer_size size of the finer system, i.e. size of external objects \b before coarsening + * @param[in] coarser_size size of the current system, i.e. size \b after coarsening */ - coarsening_data( size_t _finer_size, size_t coarser_size ) : + CoarseningData( size_t _finer_size, size_t coarser_size ) : coarsening_matrix( coarser_size, _finer_size ), Ax_finer( _finer_size ) {} - grb::RC zero_temp_vectors() { - return grb::set( Ax_finer, 0 ); + grb::RC init_vectors( IOType zero ) { + return grb::set( Ax_finer, zero ); } }; namespace internal { /** - * @brief computes the coarser residual vector \p coarsening_data.r by coarsening + * computes the coarser residual vector \p CoarseningData.r by coarsening * \p coarsening_data.Ax_finer - \p r_fine via \p coarsening_data.coarsening_matrix. * - * The coarsening information are stored inside \p coarsening_data. + * The coarsening information are stored inside \p CoarseningData. * * @tparam IOType type of result and intermediate vectors used during computation * @tparam NonzeroType type of matrix values @@ -73,7 +75,7 @@ namespace grb { * @tparam Minus the minus operator for subtractions * * @param[in] r_fine fine residual vector - * @param[in,out] coarsening_data \ref multigrid_data data structure storing the information for coarsening + * @param[in,out] coarsening_data \ref MultiGridData data structure storing the information for coarsening * @param[in] ring the ring to perform the operations on * @param[in] minus the \f$ - \f$ operator for vector subtractions * @return grb::RC::SUCCESS if the algorithm could correctly terminate, the error code of the first @@ -87,28 +89,27 @@ namespace grb { > grb::RC compute_coarsening( const grb::Vector< IOType > & r_fine, // fine residual grb::Vector< IOType > & r_coarse, // fine residual - coarsening_data< IOType, NonzeroType > & coarsening_data, + CoarseningData< IOType, NonzeroType > & coarsening_data, const Ring & ring, const Minus & minus ) { - RC ret { SUCCESS }; + RC ret = SUCCESS; // DBG_print_norm( coarsening_data.Ax_finer, "+++ Ax_finer prima" ); - ret = ret ? ret : grb::eWiseApply( coarsening_data.Ax_finer, r_fine, coarsening_data.Ax_finer, - minus ); // Ax_finer = r_fine - Ax_finer + ret = ret ? ret : grb::eWiseApply( coarsening_data.Ax_finer, r_fine, + coarsening_data.Ax_finer, minus ); // Ax_finer = r_fine - Ax_finer // DBG_print_norm( coarsening_data.Ax_finer, "+++ Ax_finer dopo" ); assert( ret == SUCCESS ); // actual coarsening, from ncols(*coarsening_data->A) == *coarsening_data->system_size * 8 // to *coarsening_data->system_size - ret = ret ? ret : grb::set( r_coarse, 0 ); + ret = ret ? ret : grb::set( r_coarse, ring.template getZero< IOType >() ); ret = ret ? ret : grb::mxv< grb::descriptors::dense >( r_coarse, coarsening_data.coarsening_matrix, coarsening_data.Ax_finer, ring ); // r = coarsening_matrix * Ax_finer - // DBG_print_norm( r_coarse, "+++ r_coarse" ); return ret; } /** - * @brief computes the prolongation of the coarser solution \p coarsening_data.z and stores it into + * computes the prolongation of the coarser solution \p coarsening_data.z and stores it into * \p x_fine. * * For prolongation, this function uses the matrix \p coarsening_data.coarsening_matrix by transposing it. @@ -130,10 +131,10 @@ namespace grb { > grb::RC compute_prolongation( const grb::Vector< IOType > & z_coarse, grb::Vector< IOType > & x_fine, // fine residual - grb::algorithms::coarsening_data< IOType, NonzeroType > & coarsening_data, + grb::algorithms::CoarseningData< IOType, NonzeroType > & coarsening_data, const Ring & ring ) { - RC ret { SUCCESS }; + RC ret = SUCCESS; // actual refining, from *coarsening_data->syztem_size == nrows(*coarsening_data->A) / 8 // to nrows(x_fine) ret = ret ? ret : set( coarsening_data.Ax_finer, 0 ); @@ -149,40 +150,55 @@ namespace grb { } // namespace internal + /** + * Runner structure, holding the data to coarsen the levels of a multi-grid simulation. + * + * This coarsener just uses the same matrix to perform the coarsening (via an mxv()) + * and the prolongation, using it transposed. + */ template< typename IOType, typename NonzeroType, class Ring, class Minus - > struct single_point_coarsener { + > struct SingleMatrixCoarsener { static_assert( std::is_default_constructible< Ring >::value, "cannot construct the Ring with default values" ); static_assert( std::is_default_constructible< Minus >::value, "cannot construct the Minus operator with default values" ); - using MultiGridInputType = multigrid_data< IOType, NonzeroType >; + using MultiGridInputType = MultiGridData< IOType, NonzeroType >; - // default value: override with your own - std::vector< std::unique_ptr< grb::algorithms::coarsening_data< IOType, NonzeroType > > > coarsener_levels; + /** + * Data to coarsen each level, from finer to coarser. + */ + std::vector< std::unique_ptr< grb::algorithms::CoarseningData< IOType, + NonzeroType > > > coarsener_levels; Ring ring; Minus minus; - - // single_point_coarsener() = default; - + /** + * Method required by MultiGridRunner before the recursive call, to coarsen + * the residual vector of \p finer (the finer system) into the residual of + * \p coarser (the coarser system). + */ inline grb::RC coarsen_residual( const MultiGridInputType &finer, MultiGridInputType &coarser ) { // first compute the residual - coarsening_data< IOType, NonzeroType > &coarsener = *coarsener_levels[ finer.level ]; - grb::RC ret = grb::set( coarsener.Ax_finer, 0 ); + CoarseningData< IOType, NonzeroType > &coarsener = *coarsener_levels[ finer.level ]; + grb::RC ret = grb::set( coarsener.Ax_finer, ring. template getZero< IOType >() ); ret = ret ? ret : grb::mxv< grb::descriptors::dense >( coarsener.Ax_finer, finer.A, finer.z, ring ); - // DBG_print_norm( coarsener.Ax_finer, "temp Axf" ); + return internal::compute_coarsening( finer.r, coarser.r, coarsener, ring, minus ); } + /** + * Method required by MultiGridRunner after the recursive call, to "prolong" the coarser solution + * into the finer solution. + */ inline grb::RC prolong_solution( const MultiGridInputType &coarser, MultiGridInputType &finer @@ -194,4 +210,4 @@ namespace grb { } // namespace algorithms } // namespace grb -#endif // _H_GRB_ALGORITHMS_HPCG_COARSENER +#endif // _H_GRB_ALGORITHMS_HPCG_SINGLE_MATRIX_COARSENER diff --git a/include/graphblas/utils/iterators/IteratorValueAdaptor.hpp b/include/graphblas/utils/iterators/IteratorValueAdaptor.hpp index 81864cb20..bca870af8 100644 --- a/include/graphblas/utils/iterators/IteratorValueAdaptor.hpp +++ b/include/graphblas/utils/iterators/IteratorValueAdaptor.hpp @@ -15,11 +15,17 @@ * limitations under the License. */ +/** + * @dir include/graphblas/utils/iterators + * Various utilities to work with STL-like iterators and ALP/GraphBLAS iterators: + * adaptors, partitioning facilities, traits and functions to check compile-time + * and runtime properties. + */ + /** * @file IteratorValueAdaptor.hpp * @author Alberto Scolari (alberto.scolari@huawei.com) - * @brief Adaptor to extract a given value out of an iterator. - * @date 2022-10-08 + * Definition of an adaptor to extract a given value out of an iterator. */ #ifndef H_GRB_UTILS_ITERATOR_VALUE_ADAPTOR @@ -45,9 +51,10 @@ namespace grb { typename AdaptorType > struct IteratorValueAdaptor { - static_assert( std::is_default_constructible< AdaptorType >::value, "RefType must be default-constructible" ); - static_assert( std::is_copy_constructible< AdaptorType >::value, "RefType must be copy-constructible" ); - static_assert( std::is_copy_assignable< AdaptorType >::value, "RefType must be copy-assignable" ); + static_assert( std::is_copy_constructible< AdaptorType >::value, + "AdaptorType must be copy-constructible" ); + static_assert( std::is_copy_assignable< AdaptorType >::value, + "AdaptorType must be copy-assignable" ); typedef decltype( std::declval< AdaptorType >()( *std::declval< InnerIterType >() ) ) reference; typedef typename std::decay< reference >::type value_type; @@ -65,31 +72,51 @@ namespace grb { using SelfType = IteratorValueAdaptor< InnerIterType, AdaptorType >; /** - * Construct a new Iterator Value Adaptor object fro an actual iterator. + * Construct a new IteratorValueAdaptor object from an actual iterator. * The adaptor is built via its default constructor. - * - * @param _iter the underlying iterator, to be copied + */ + IteratorValueAdaptor( typename std::enable_if< std::is_default_constructible< AdaptorType >::value, + const InnerIterType & >::type _iter ) : + iter( _iter ), + adaptor() {} + + /** + * Construct a new IteratorValueAdaptor object from an iterator and an existing adaptor object. */ IteratorValueAdaptor( - const InnerIterType &_iter + const InnerIterType &_iter, + const AdaptorType &_adaptor ) : iter( _iter ), - adaptor() {} + adaptor( _adaptor ) {} /** - * Construct a new Iterator Value Adaptor object fro an actual iterator. + * Construct a new Iterator Value Adaptor object from an actual iterator. * The adaptor is built via its default constructor. * * @param _iter the underlying iterator, to be moved */ IteratorValueAdaptor( - InnerIterType &&_iter + typename std::enable_if< std::is_default_constructible< AdaptorType >::value, + InnerIterType && >::type _iter ) : iter( std::move( _iter ) ), adaptor() {} + /** + * Construct a new IteratorValueAdaptor object from an actual iterator + * and an existing adaptor object by moving their state. + */ + IteratorValueAdaptor( + InnerIterType &&_iter, + AdaptorType &&_adaptor + ) : + iter( std::move( _iter ) ), + adaptor( std::move( _adaptor ) ) {} + IteratorValueAdaptor() = delete; + // since it is an iterator, we MUST have copy and move semantics IteratorValueAdaptor( const SelfType & ) = default; IteratorValueAdaptor( SelfType && ) = default; @@ -112,12 +139,18 @@ namespace grb { SelfType& operator++() { ++iter; return *this; } - SelfType & operator+=( typename std::enable_if< is_random_access, const size_t >::type offset ) { + SelfType & operator+=( + typename std::enable_if< is_random_access, + const size_t >::type offset + ) { iter += offset; return *this; } - difference_type operator-( typename std::enable_if< is_random_access, const SelfType & >::type other ) { + difference_type operator-( + typename std::enable_if< is_random_access, + const SelfType & >::type other + ) { return iter - other.iter; } }; diff --git a/include/graphblas/utils/iterators/partition_range.hpp b/include/graphblas/utils/iterators/partition_range.hpp index dd5f397c4..60d228b3a 100644 --- a/include/graphblas/utils/iterators/partition_range.hpp +++ b/include/graphblas/utils/iterators/partition_range.hpp @@ -15,6 +15,12 @@ * limitations under the License. */ +/** + * @file partition_range.hpp + * @author Alberto Scolari (alberto.scolari@huawei.com) + * Definition of utilities to partition iterators across processes. + */ + #include #include #include @@ -25,44 +31,73 @@ namespace grb { namespace utils { - template< typename T > void partition_nonzeroes( + /** + * Partitions the size of a collection across processes and computes the first offset + * and the size for the local partition. + * + * @tparam T size type + * @param[in] num_procs total number of processes + * @param[in] this_proc ID of current process + * @param[in] num_elements total number of elements in the collection + * @param[out] first_offset offset to the first element of the local partition + * @param[out] local_size size of the local partition + */ + template< typename T > void partition_collection_size( size_t num_procs, size_t this_proc, T num_elements, T& first_offset, - T& last_offset + T& local_size ) { - const T per_process{ ( num_elements + num_procs - 1 ) / num_procs }; // round up + const T per_process = ( num_elements + num_procs - 1 ) / num_procs; // round up first_offset = std::min( per_process * static_cast< T >( this_proc ), num_elements ); - last_offset = std::min( first_offset + per_process, num_elements ); + local_size = std::min( first_offset + per_process, num_elements ); } + /** + * Partitions an iteration range across processes according to the given information. + * + * With \p num_procs processes and \p this_proc < \p num_procs and a collection of \p num_elements + * elements across all processes, it partitions the collection evenly among processes and sets + * \p begin and \p end so that they iterate over the local partition designated by \p this_proc. + * + * It works also for a single-process scenario. + * + * Note: the number of processes and the ID of the current process is expected in input + * not to introduce dependencies on separate code paths. + * + * @tparam IterT iterator type + * @param[in] num_procs number of processes + * @param[in] this_proc Id of current process + * @param[in] num_elements number of elements of the collection; it can be computed as + * \code std::distance( begin, end ) \endcode + * @param[out] begin beginning iterator to the whole collection + * @param[out] end end iterator + */ template< typename IterT > void partition_iteration_range_on_procs( size_t num_procs, size_t this_proc, - size_t num_nonzeroes, + size_t num_elements, IterT &begin, IterT &end ) { static_assert( std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< IterT >::iterator_category >::value, "the given iterator is not a random access one" ); - assert( num_nonzeroes == static_cast< size_t >( end - begin ) ); - size_t first, last; - partition_nonzeroes( num_procs, this_proc, num_nonzeroes, first, last ); - if( last < num_nonzeroes ) { + assert( this_proc < num_procs ); + assert( num_elements == static_cast< size_t >( end - begin ) ); + if( num_procs == 1 ) { + return; + } + size_t first, num_local_elements; + partition_collection_size( num_procs, this_proc, num_elements, first, num_local_elements ); + if( num_local_elements < num_elements ) { end = begin; - end += last; + end += num_local_elements; + } + if( first > 0 ) { + begin += first; } - begin += first; - } - - template< typename IterT > void partition_iteration_range_on_procs( - size_t num_nonzeroes, - IterT &begin, - IterT &end - ) { - return partition_iteration_range_on_procs( spmd<>::nprocs(), spmd<>::pid(), num_nonzeroes, begin, end ); } } // namespace utils diff --git a/include/graphblas/utils/iterators/utils.hpp b/include/graphblas/utils/iterators/utils.hpp index b56899c83..0b635578d 100644 --- a/include/graphblas/utils/iterators/utils.hpp +++ b/include/graphblas/utils/iterators/utils.hpp @@ -25,6 +25,8 @@ #define _H_GRB_ITERATOR_UTILS #include +#include +#include #include #include @@ -78,6 +80,28 @@ namespace grb { return SUCCESS; } + /** + * Computes the difference between \p a \a - \p b and returns it as the given + * type \p DiffType. + * + * Raises an exception if \p DiffType cannot store the difference. + */ + template< + typename DiffType, + typename SizeType + > DiffType compute_signed_distance( + const SizeType a, + const SizeType b + ) { + static_assert( std::is_signed< DiffType >::value, "DiffType should be signed" ); + const SizeType diff = std::max( a, b ) - std::min( a, b ); + if( diff > static_cast< SizeType >( std::numeric_limits< DiffType >::max() ) ) { + throw std::range_error( "cannot represent difference" ); + } + DiffType result = static_cast< DiffType >( diff ); + return a >= b ? result : -result ; + } + } // end namespace utils } // end namespace grb diff --git a/include/graphblas/utils/multigrid/array_vector_storage.hpp b/include/graphblas/utils/multigrid/array_vector_storage.hpp index 8eb1e4377..a40850f77 100644 --- a/include/graphblas/utils/multigrid/array_vector_storage.hpp +++ b/include/graphblas/utils/multigrid/array_vector_storage.hpp @@ -19,9 +19,7 @@ * @file array_vector_storage.cpp * @author Alberto Scolari (alberto.scolari@huawei.com) * Extension of std::array<> exposing a larger interface and the underlying - * storage structure. - * - * @date 2022-10-24 + * storage structure. */ #ifndef _H_GRB_ALGORITHMS_MULTIGRID_ARRAY_VECTOR_STORAGE @@ -38,12 +36,12 @@ namespace grb { /** * Array with fixed size based on std::array with an interface compliant to what other classes - * in the geometry namespace expect, like storage() and dimensions() methods. + * in the geometry namespace expect, like #storage() and #dimensions() methods. * * It describes a vector of dimensions #dimensions(). * - * @tparam DataType the data type of the vector elements * @tparam DIMS the dimensions of the vector + * @tparam DataType the data type of the vector elements */ template< size_t DIMS, @@ -55,6 +53,12 @@ namespace grb { using ConstVectorStorageType = const std::array< DataType, DIMS >&; using SelfType = ArrayVectorStorage< DIMS, DataType >; + /** + * Construct a new Array Vector Storage object of given dimensions; + * internal values are \b not initialized. + * + * \p _dimensions must be equal to \p DIMS, or an exception is thrown. + */ ArrayVectorStorage( size_t _dimensions ) { static_assert( DIMS > 0, "cannot allocate 0-sized array" ); if( _dimensions != DIMS ) { @@ -71,23 +75,31 @@ namespace grb { ArrayVectorStorage( SelfType &&o ) = delete; - SelfType& operator=( - const SelfType &original - ) noexcept { + SelfType& operator=( const SelfType &original ) noexcept { std::copy_n( original.begin(), DIMS, this->begin() ); return *this; } SelfType & operator=( SelfType &&original ) = delete; + /** + * Returns the geometrical dimensions of this vector, i.e. of the + * geometrical space it refers to. + */ constexpr size_t dimensions() const { return DIMS; } + /** + * Returns a reference to the underlying storage object. + */ inline VectorStorageType storage() { return *this; } + /** + * Returns a const reference to the underlying storage object. + */ inline ConstVectorStorageType storage() const { return *this; } diff --git a/include/graphblas/utils/multigrid/dynamic_vector_storage.hpp b/include/graphblas/utils/multigrid/dynamic_vector_storage.hpp index 9168f175c..9e5b7f92e 100644 --- a/include/graphblas/utils/multigrid/dynamic_vector_storage.hpp +++ b/include/graphblas/utils/multigrid/dynamic_vector_storage.hpp @@ -15,6 +15,12 @@ * limitations under the License. */ +/** + * @file dynamic_vector_storage.cpp + * @author Alberto Scolari (alberto.scolari@huawei.com) + * Extension of a heap-allocated array exposing the underlying storage and iterators. + */ + #ifndef _H_GRB_ALGORITHMS_MULTIGRID_DYNAMIC_VECTOR_STORAGE #define _H_GRB_ALGORITHMS_MULTIGRID_DYNAMIC_VECTOR_STORAGE @@ -22,14 +28,6 @@ #include #include -/** - * @file dynamic_vector_storage.cpp - * @author Alberto Scolari (alberto.scolari@huawei.com) - * Extension of a heap-allocated array exposing the underlying storage and iterators. - * - * @date 2022-10-24 - */ - namespace grb { namespace utils { namespace multigrid { diff --git a/include/graphblas/utils/multigrid/halo_matrix_generator_iterator.hpp b/include/graphblas/utils/multigrid/halo_matrix_generator_iterator.hpp index 2bd82ff35..2404cdf00 100644 --- a/include/graphblas/utils/multigrid/halo_matrix_generator_iterator.hpp +++ b/include/graphblas/utils/multigrid/halo_matrix_generator_iterator.hpp @@ -1,18 +1,67 @@ +/* + * Copyright 2022 Huawei Technologies Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @dir include/graphblas/utils/multigrid + * This folder contains various utilities to describe an N-dimensional mesh (possibly with halo) + * and iterate through its elements and through the neighbors of each element, possible generating + * a matrix out of this information. + * + * These facilities are used to generate system matrices and various inputs for multi-grid simulations. + */ + +/** + * @file halo_matrix_generator_iterator.cpp + * @author Alberto Scolari (alberto.scolari@huawei.com) + * Definition of HaloMatrixGeneratorIterator. + */ + #ifndef _H_GRB_ALGORITHMS_MULTIGRID_HALO_MATRIX_GENRATOR_ITERATOR #define _H_GRB_ALGORITHMS_MULTIGRID_HALO_MATRIX_GENRATOR_ITERATOR #include +#include "array_vector_storage.hpp" #include "linearized_halo_ndim_system.hpp" #include "linearized_ndim_system.hpp" #include "linearized_ndim_iterator.hpp" -#include "array_vector_storage.hpp" namespace grb { namespace utils { namespace multigrid { + /** + * Iterator type to generate a matrix on top of the couples - of an + * \p DIMS -dimensional mesh. + * + * This iterator is random-access and meets the the interface of an ALP/GraphBLAS + * input iterator, i.e. an object of this type \a it has methods \a i(), \a j() and + * \a v() to describe a nonzero triplet (row index, column index and value, respectively). + * + * This data structure is based on the LinearizedHaloNDimIterator class, esentially wrapping the + * underlying element index as \a i() and the neighbor index as \a j(); the value \a v() + * is user-customizable via a functor of type \p ValueCallable, which emits the nonzero + * of type \p ValueType based on the passed values of \a i() and \a j(). + * + * @tparam DIMS number of dimensions + * @tparam CoordType tyoe storing the coordinate and the system sizes along each dimension + * @tparam ValueType type of nonzeroes + * @tparam ValueCallable callable object producing the nonzero value based on \a i() and \a j() + */ template< size_t DIMS, typename CoordType, @@ -55,8 +104,6 @@ namespace grb { } private: - // ValueType diagonal_value; ///< value to be emitted when the object has moved to the diagonal - // ValueType non_diagonal_value; ///< value to emit outside of the diagonal ValueCallable _value_producer; RowIndexType _i; ColumnIndexType _j; @@ -70,7 +117,7 @@ namespace grb { using difference_type = typename Iterator::difference_type; /** - * @brief Construct a new \c HaloMatrixGeneratorIterator object, setting the current row as \p row + * Construct a new \c HaloMatrixGeneratorIterator object, setting the current row as \p row * and emitting \p diag if the iterator has moved on the diagonal, \p non_diag otherwise. * * @param sizes array with the sizes along the dimensions @@ -94,7 +141,7 @@ namespace grb { SelfType & operator=( const SelfType & ) = default; /** - * @brief Increments the iterator by moving coordinates to the next (row, column) to iterate on. + * Increments the iterator by moving coordinates to the next (row, column) to iterate on. * * This operator internally increments the columns coordinates until wrap-around, when it increments * the row coordinates and resets the column coordinates to the first possible columns; this column coordinate @@ -119,7 +166,7 @@ namespace grb { } /** - * @brief Operator to compare \c this against \p o and return whether they differ. + * Operator to compare \c this against \p o and return whether they differ. * * @param o object to compare \c this against * @return true of the row or the column is different between \p o and \c this @@ -130,7 +177,7 @@ namespace grb { } /** - * @brief Operator to compare \c this against \p o and return whether they are equal. + * Operator to compare \c this against \p o and return whether they are equal. * * @param o object to compare \c this against * @return true of the row or the column is different between \p o and \c this @@ -141,7 +188,7 @@ namespace grb { } /** - * @brief Operator returning the triple to directly access row, column and element values. + * Operator returning the triple to directly access row, column and element values. * * Useful when building the matrix by copying the triple of coordinates and value, * like for the BSP1D backend. @@ -155,21 +202,21 @@ namespace grb { } /** - * @brief Returns the current row. + * Returns the current row. */ inline RowIndexType i() const { return _val.i(); } /** - * @brief Returns the current column. + * Returns the current column. */ inline ColumnIndexType j() const { return _val.j(); } /** - * @brief Returns the current matrix value. + * Returns the current matrix value. * * @return ValueType #diagonal_value if \code row == column \endcode (i.e. if \code this-> \endcode * #i() \code == \endcode \code this-> \endcode #j()), #non_diagonal_value otherwise diff --git a/include/graphblas/utils/multigrid/linearized_halo_ndim_geometry.hpp b/include/graphblas/utils/multigrid/linearized_halo_ndim_geometry.hpp deleted file mode 100644 index 0e53dd671..000000000 --- a/include/graphblas/utils/multigrid/linearized_halo_ndim_geometry.hpp +++ /dev/null @@ -1,226 +0,0 @@ - -#ifndef _H_GRB_ALGORITHMS_MULTIGRID_LINEARIZED_HALO_NDIM_GEOMETRY -#define _H_GRB_ALGORITHMS_MULTIGRID_LINEARIZED_HALO_NDIM_GEOMETRY - -#include -#include -#include -#include -#include -#include -#include -#include - -#include "array_vector_storage.hpp" -#include "dynamic_vector_storage.hpp" -#include "linearized_ndim_system.hpp" -#include "ndim_vector.hpp" - -namespace grb { - namespace utils { - namespace multigrid { - - template< - size_t DIMS, - typename CoordType - > void __compute_neighbors_range( - const ArrayVectorStorage< DIMS, CoordType > &_system_sizes, - const CoordType halo, - const ArrayVectorStorage< DIMS, CoordType > &system_coordinates, - ArrayVectorStorage< DIMS, CoordType > &neighbors_start, - ArrayVectorStorage< DIMS, CoordType > &neighbors_range ) { - - for( CoordType i{0}; i < DIMS/* - 1*/; i++ ) { - const CoordType start{ system_coordinates[i] <= halo ? 0 : system_coordinates[i] - halo }; - const CoordType end{ std::min( system_coordinates[i] + halo, _system_sizes[i] - 1 ) }; - neighbors_start[i] = start; - neighbors_range[i] = end - start + 1; - } - } - - template< - size_t DIMS, - typename CoordType - > size_t __neighbour_to_system_coords( - const std::array< CoordType, DIMS > &sizes, - size_t system_size, - const std::vector< NDimVector< CoordType, CoordType, DynamicVectorStorage< CoordType > > > - &dimension_neighbors, - CoordType halo, - CoordType neighbor, - ArrayVectorStorage< DIMS, CoordType > &result - ){ - if( neighbor > system_size ) { - throw std::invalid_argument("neighbor number ( " + std::to_string(neighbor) - + " ) >= system size ( " + std::to_string( system_size ) + " )"); - } - ArrayVectorStorage< DIMS, CoordType > halo_coords( DIMS ); -#ifdef _DEBUG - size_t * const halo_coords_end{ halo_coords.data() + DIMS }; -#endif - std::fill_n( halo_coords.begin(), DIMS, 0 ); - - for( size_t _dim{DIMS}; _dim > 0; _dim--) { - - const size_t dimension{_dim - 1}; - const size_t dimension_size{ sizes[dimension] }; - const NDimVector< CoordType, CoordType, DynamicVectorStorage< CoordType > > & neighbors{ dimension_neighbors[dimension] }; - - CoordType * const halo_coords_begin{ halo_coords.data() + dimension }; -#ifdef _DEBUG - std::cout << "DIMENSION " << dimension << std::endl << "- setup - neighbour " << neighbor << std::endl; - std::cout << "\thalo : "; - print_sequence( halo_coords_begin, halo_coords_end ) << std::endl; -#endif - size_t h{0}; - size_t previous_neighs{ 0 }; - *halo_coords_begin = h; - size_t halo_max_neighs{ neighbors.at( halo_coords_begin ) }; - //std::cout << "\tinitial halo_max_neighs " << halo_max_neighs << std::endl; - while( h < halo && neighbor >= previous_neighs + halo_max_neighs ) { - h++; - *halo_coords_begin = h; - previous_neighs += halo_max_neighs; - halo_max_neighs = neighbors.at( halo_coords_begin ); - } -#ifdef _DEBUG - std::cout << "- initial halo - neighbour " << neighbor << std::endl; - std::cout << "\th " << h << std::endl; - std::cout << "\thalo : "; - print_sequence( halo_coords_begin, halo_coords_end ) << std::endl; - std::cout << "\thalo_max_neighs " << halo_max_neighs << std::endl; -#endif - if ( h < halo ){ - result[dimension] = h; - neighbor -= previous_neighs; -#ifdef _DEBUG - std::cout << "end neighbour " << neighbor << std::endl; -#endif - continue; - } - // saturation occurred - const size_t distance_from_halo{ ( neighbor - previous_neighs ) / halo_max_neighs }; -#ifdef _DEBUG - std::cout << "- before middle elements - neighbour " << neighbor << std::endl; - std::cout << "\tprevious_neighs " << previous_neighs << std::endl; - std::cout << "\thalo_max_neighs " << halo_max_neighs << std::endl; - std::cout << "\tdistance_from_halo " << distance_from_halo << std::endl; - std::cout << "\tdimension_size " << dimension_size << std::endl; -#endif - if ( distance_from_halo < dimension_size - 2 * halo ) { - result[dimension] = distance_from_halo + halo; - neighbor -= (previous_neighs + distance_from_halo * halo_max_neighs) ; -#ifdef _DEBUG - std::cout << "end neighbour " << neighbor << std::endl; -#endif - continue; - } - previous_neighs += ( dimension_size - 2 * halo ) * halo_max_neighs; -#ifdef _DEBUG - std::cout << "- after middle elements -neighbour " << neighbor << std::endl; - std::cout << "\tprevious_neighs " << previous_neighs << std::endl; - std::cout << "\thalo_max_neighs " << halo_max_neighs << std::endl; -#endif - h = halo - 1; - *halo_coords_begin = h; - halo_max_neighs = neighbors.at( halo_coords_begin ); - while( h > 0 && neighbor >= previous_neighs + halo_max_neighs ) { - h--; - *halo_coords_begin = h; - previous_neighs += halo_max_neighs; - halo_max_neighs = neighbors.at( halo_coords_begin ); - } - neighbor -= previous_neighs; -#ifdef _DEBUG - std::cout << "- final halo - neighbour " << neighbor << std::endl; - std::cout << "\tadding h " << h << " previous_neighs " << previous_neighs << std::endl; -#endif - // ( dimension_size - 1 ) because coordinates are 0-based and neighbor - // is "inside" range [ previous_neighs, previous_neighs + halo_max_neighs ] - result[dimension] = dimension_size - 1 - h; -#ifdef _DEBUG - std::cout << "end neighbour " << neighbor << std::endl; -#endif - } - return neighbor; - } - - - template< typename CoordType > size_t __accumulate_dimension_neighbours( - const NDimVector< CoordType, CoordType, DynamicVectorStorage< CoordType > > &prev_neighs, - CoordType* coords_buffer, - size_t halo, - size_t local_size - ) { - size_t neighs{0}; - size_t h{0}; - for( ; h < halo && local_size > 1; h++ ) { - *coords_buffer = h; - - const size_t local_neighs{ prev_neighs.at( coords_buffer ) }; - neighs += 2 * local_neighs; // the 2 sides - local_size -= 2; - } - *coords_buffer = h; - neighs += local_size * prev_neighs.at( coords_buffer ); // innermost elements - return neighs; - } - - template< typename CoordType > void __populate_halo_neighbors( size_t halo, - NDimVector< CoordType, CoordType, DynamicVectorStorage< CoordType > >& container ) { - - using it_type = typename NDimVector< CoordType, CoordType, DynamicVectorStorage< CoordType > >::DomainIterator; - it_type end{ container.domain_end() }; - for( it_type it{ container.domain_begin() }; it != end; ++it ) { - size_t res{1}; - for( size_t h: it->get_position() ) res *= (h + 1 + halo); - container.at( it->get_position() ) = res; - } - } - - template< - typename CoordType, - size_t DIMS - > size_t __init_halo_search( - typename LinearizedNDimSystem< CoordType, ArrayVectorStorage< DIMS, CoordType > >::ConstVectorReference - sizes, - size_t halo, - std::vector< NDimVector< CoordType, CoordType, DynamicVectorStorage< CoordType > > >& dimension_limits - ) { - using nd_vec = NDimVector< CoordType, CoordType, DynamicVectorStorage< CoordType > >; - using nd_vec_iterator = typename nd_vec::DomainIterator; - - std::vector halo_sizes( DIMS, halo + 1); - dimension_limits.emplace_back(halo_sizes); - // initialize values - __populate_halo_neighbors< CoordType >( halo, dimension_limits[0] ); - for( size_t i{1}; i < DIMS; i++ ) { - std::vector halos( DIMS - i, halo + 1 ); - dimension_limits.emplace_back(halos); - } - - std::array< CoordType, DIMS > prev_coords_buffer; // store at most DIMS values - CoordType* const prev_coords{ prev_coords_buffer.data() }; - CoordType* const second{ prev_coords + 1 }; // store previous coordinates from second position - for( size_t dimension{1}; dimension < DIMS; dimension++ ) { - const nd_vec& prev_neighs{dimension_limits[dimension - 1]}; - nd_vec& current_neighs{dimension_limits[dimension]}; - - nd_vec_iterator end{ current_neighs.domain_end() }; - for( nd_vec_iterator it{ current_neighs.domain_begin() }; it != end; ++it ) { - typename nd_vec::ConstDomainVectorReference current_halo_coords{ it->get_position() }; - - std::copy( it->get_position().cbegin(), it->get_position().cend(), second ); - size_t local_size{ sizes[dimension - 1] }; - const size_t neighs{ __accumulate_dimension_neighbours(prev_neighs, prev_coords, halo, local_size) }; - current_neighs.at(current_halo_coords) = neighs; - } - } - return __accumulate_dimension_neighbours( dimension_limits[DIMS - 1], prev_coords, halo, sizes.back() ); - } - - } // namespace multigrid - } // namespace utils -} // namespace grb - -#endif // _H_GRB_ALGORITHMS_MULTIGRID_LINEARIZED_HALO_NDIM_GEOMETRY diff --git a/include/graphblas/utils/multigrid/linearized_halo_ndim_iterator.hpp b/include/graphblas/utils/multigrid/linearized_halo_ndim_iterator.hpp index 871d62b7c..62e4dcd4a 100644 --- a/include/graphblas/utils/multigrid/linearized_halo_ndim_iterator.hpp +++ b/include/graphblas/utils/multigrid/linearized_halo_ndim_iterator.hpp @@ -15,16 +15,23 @@ * limitations under the License. */ +/** + * @file linearized_halo_ndim_iterator.cpp + * @author Alberto Scolari (alberto.scolari@huawei.com) + * Definition of LinearizedHaloNDimSystem. + */ + #ifndef _H_GRB_ALGORITHMS_MULTIGRID_LINEARIZED_HALO_NDIM_ITERATOR #define _H_GRB_ALGORITHMS_MULTIGRID_LINEARIZED_HALO_NDIM_ITERATOR #include #include -#include #include #include #include +#include + #include "linearized_ndim_system.hpp" #include "array_vector_storage.hpp" #include "linearized_ndim_iterator.hpp" @@ -39,6 +46,60 @@ namespace grb { typename SizeType > class LinearizedHaloNDimSystem; + /** + * Class to iterate over the \b neighbors of a system with halo: by advancing the iterator, + * the user can traverse all neighbors of all elements one-by-one, in order, for example, to + * emit all possible copies element-neighbor. + * + * Example: for a 2-dimensional 3 x 3 system with halo 1, with elements numbered as in + * + * 0 1 2 + * 3 4 5 + * 6 7 8 + * + * the emitted couples are: + * + * 0-0, 0-1, 0-3, 0-4; 1-0, 1-1, 1-2, 1-3, 1-4, 1-5; 2-1, 2-2, 2-4, 2-5; + * 3-0, 3-1, 3-3, 3-4; 4-0, 4-1, 4-2, 4-3, 4-4, 4-5, 4-6, 4-7, 4-8; and so on. + * + * It implements two interfaces for iteration. The first is a standard STL-like + * interface meeting the random-access requirements, with operators \a ++, \a *, \a ->, + * \a +=, \a -, \a ==; these facilities iterate over \b all neighbors of the underlying system, + * automatically updating the corresponding element the neighbor is associated to. + * The second interface is a custom (Java-like) one that allows to iterate separately over elements + * and their neighbors: the user can query whether more elements exist, move to the next element, + * iterate over the neighbors of the current element, query whether more neighbors exist for the + * current element. + * + * The state of this structure essentially contains: + * + * 1. a const-pointer to a LinearizedHaloNDimSystem object, storing the geometry + * information of the N-dimensional system. + * 2. the iterator to the current element (which in turn provides the element's vector + * and linear coordinates) + * 3. the vector coordinate of the current neighbor + * 4. the linear coordinate of the current neighbor + * 5. information about the current element's neighbors space: + * 1. the N-dimensional sub-space of neighbors w.r.t. the current element: this + * LinearizedHaloNDimSystem object stores the sizes of the neighbors's sub-space + * centered around the current element (at most 2 * halo + 1 per dimension, if the current + * element is an inner one); hence, it computes coordinates and provides iterators that are + * \b relative to the current element + * 2. vector coordinates of the first neighbor of the current element, in the main system + * (i.e. \b not relative); this allows computing any neighbor as the sum of this vector + * plus its relative coordinates in the neighbors' sub-space + * 3. iterator to the current neighbor, built out of the relative sub-space, to actually iterate + * over the current element's neighbors + * 4. iterator to the last neighbor of the current element, to stop the iteration over neighbors + * and advance to the next element. + * + * The above-mentioned methods to advance the iterator \c this (over neighbors or elements) + * take care of updating these structures properly, keeping the state \b always coherent. + * + * @tparam DIMS syztem number of dimensions + * @tparam SizeType type of coordinates and of sizes (must be large enough to describe the size + * of the system along each direction) + */ template< size_t DIMS, typename SizeType @@ -52,6 +113,11 @@ namespace grb { using ConstVectorReference = typename VectorIteratorType::ConstVectorReference; using SelfType = LinearizedHaloNDimIterator< DIMS, SizeType >; + /** + * Structure holding the information about a neighbor in a system: its linear + * and vector coordinates and the element it is neighbor of (in the form of both + * linear and vectoor coordinate). + */ struct HaloNDimElement { private: @@ -84,22 +150,37 @@ namespace grb { HaloNDimElement& operator=( const HaloNDimElement& ) = default; + /** + * Get the element as vector coordinates. + */ ConstVectorReference get_element() const { return this->_element_iter->get_position(); } + /** + * Get the element as linear coordinates. + */ size_t get_element_linear() const { return this->_system->ndim_to_linear( this->_element_iter->get_position() ); } + /** + * Get the neighbor as vector coordinates. + */ ConstVectorReference get_neighbor() const { return this->_neighbor; } + /** + * Get the neighbor as linear coordinates. + */ size_t get_neighbor_linear() const { return this->_system->ndim_to_linear( this->_neighbor ); } + /** + * Get the (unique) neighbor number in the system. + */ SizeType get_position() const { return this->_position; } @@ -112,54 +193,22 @@ namespace grb { using reference = const HaloNDimElement&; using difference_type = signed long; - private: - HaloNDimElement _point; - LinearizedNDimSystem< SizeType, VectorType > _neighbors_linearizer; - VectorIteratorType _neighbor_iter; // iterator in the sub-space of neighbors (0-based) - VectorType _neighbors_start; - VectorIteratorType _neighbor_end; - - inline void __update_neighbor() { - for( size_t i{0}; i < DIMS; i++ ) { - this->_point._neighbor[i] = this->_neighbors_start[i] + this->_neighbor_iter->get_position()[i]; - } - } - - inline void on_neighbor_iter_update() { - this->__update_neighbor(); - } - - void on_element_update() { - // reset everything - VectorType neighbors_range( DIMS ); - this->_point._system->compute_neighbors_range( - this->_point._element_iter->get_position(), - this->_neighbors_start, - neighbors_range - ); - // re-target _neighbors_linearizer - this->_neighbors_linearizer.retarget( neighbors_range ); - } - - void on_element_advance() { - this->on_element_update(); - - this->_neighbor_iter = VectorIteratorType( this->_neighbors_linearizer ); - this->_neighbor_end = VectorIteratorType::make_system_end_iterator( this->_neighbors_linearizer ); - - this->on_neighbor_iter_update(); - } - - public: - LinearizedHaloNDimIterator() = delete; + /** + * Construct a new LinearizedHaloNDimIterator object from the underlying system + * \p system (whose geometry information is used to iterate). The constructed object + * points to the first neighbor of the first element, i.e. the one with vector coordinates + * \a [0,0,...,0]. + * + * IF \p system is not valid anymore, then also \c this is not. + */ LinearizedHaloNDimIterator( const SystemType& system ) noexcept : _point( system ), - _neighbors_linearizer( DIMS, system.halo() + 1 ), - _neighbor_iter( this->_neighbors_linearizer ), + _neighbors_subspace( DIMS, system.halo() + 1 ), _neighbors_start( DIMS ), - _neighbor_end( VectorIteratorType::make_system_end_iterator( this->_neighbors_linearizer ) ) + _neighbor_iter( this->_neighbors_subspace ), + _neighbor_end( VectorIteratorType::make_system_end_iterator( this->_neighbors_subspace ) ) { std::fill_n( this->_neighbors_start.begin(), DIMS, 0 ); } @@ -180,30 +229,54 @@ namespace grb { return &(this->_point); } + /** + * Tells whether the current element has more neighbor available (on which the user + * has not iterated yet). + */ bool has_more_neighbours() const { return this->_neighbor_iter != this->_neighbor_end; } + /** + * Moves \c this to point to the next neighbor (if any, exception otherwise). + * + * Does \b not advance the element, which should be done manually via #next_element(). + */ void next_neighbour() { + if( !has_more_neighbours() ) { + throw std::out_of_range("the current element has no more neighbors"); + } ++(this->_neighbor_iter); this->on_neighbor_iter_update(); this->_point._position++; } + /** + * Tells whether the system has more elements. + */ bool has_more_elements() const { return this->_point.get_element_linear() != (this->_point._system)->base_system_size(); } + /** + * Moves \c this to point to the next element, setting the neighbor as the first one. + */ void next_element() { - size_t num_neighbours = this->_neighbors_linearizer.system_size(); + if( !has_more_elements() ) { + throw std::out_of_range("the system has no more elements"); + } + size_t num_neighbours = this->_neighbors_subspace.system_size(); size_t neighbour_position_offset = - this->_neighbors_linearizer.ndim_to_linear( this->_neighbor_iter->get_position() ); + this->_neighbors_subspace.ndim_to_linear( this->_neighbor_iter->get_position() ); ++(this->_point._element_iter); this->on_element_advance(); this->_point._position -= neighbour_position_offset; this->_point._position += num_neighbours; } + /** + * Moves \c this to point to the next neighbor, also advancing the element if needed. + */ SelfType & operator++() noexcept { ++(this->_neighbor_iter); if( !has_more_neighbours() ) { @@ -217,60 +290,104 @@ namespace grb { return *this; } + /** + * Moves \c this ahead of \p offste neighbors, also advancing the element if necessary. + */ SelfType & operator+=( size_t offset ) { if( offset == 1UL ) { return this->operator++(); } - const size_t final_position { this->_point._position + offset }; + const size_t final_position = this->_point._position + offset; if( final_position > this->_point._system->halo_system_size() ) { throw std::range_error( "neighbor linear value beyond system" ); } VectorType final_element( DIMS ); - size_t neighbor_index{ (this->_point._system->neighbour_linear_to_element( final_position, final_element )) }; + size_t neighbor_index = (this->_point._system->neighbour_linear_to_element( final_position, final_element )); this->_point._element_iter = VectorIteratorType( *this->_point._system, final_element.cbegin() ); this->_point._position = final_position; this->on_element_update(); - this->_neighbors_linearizer.linear_to_ndim( neighbor_index, final_element ); + this->_neighbors_subspace.linear_to_ndim( neighbor_index, final_element ); - this->_neighbor_iter = VectorIteratorType( this->_neighbors_linearizer, final_element.cbegin() ); - this->_neighbor_end = VectorIteratorType::make_system_end_iterator( this->_neighbors_linearizer ); + this->_neighbor_iter = VectorIteratorType( this->_neighbors_subspace, final_element.cbegin() ); + this->_neighbor_end = VectorIteratorType::make_system_end_iterator( this->_neighbors_subspace ); this->on_neighbor_iter_update(); return *this; } + /** + * Returns the difference between \c this and \p other in the linear space of neighbors, + * i.e. how many times \p other must be advanced in order to point to the same neighbor of \c this. + * + * It throws if the result cannot be stored as a difference_type variable. + */ difference_type operator-( const SelfType &other ) const { - /* - if( _point.get_position() < a_point.get_position() ) { - throw std::invalid_argument( "first iterator is in a lower position than second" ); - } - */ - size_t a_pos{ _point.get_position() }, b_pos{ other._point.get_position() }; - // std::cout << "diff " << a_pos << " - " << b_pos << std::endl; - size_t lowest{ std::min( a_pos, b_pos ) }, highest{ std::max( a_pos, b_pos )}; - using diff_t = typename LinearizedHaloNDimIterator< DIMS, SizeType >::difference_type; - - if( highest - lowest > static_cast< size_t >( - std::numeric_limits< diff_t >::max() ) ) { - throw std::invalid_argument( "iterators are too distant" ); - } - - return ( static_cast< diff_t >( a_pos - b_pos ) ); + return grb::utils::compute_signed_distance< difference_type, SizeType >( + _point.get_position(), other._point.get_position() ); } - // implementation depending on logic in operator++ + /** + * Utility to build an iterator to the end of the system \p system. + * + * The implementation depends on the logic of operator++. + */ static SelfType make_system_end_iterator( const SystemType& system ) { SelfType result( system ); - // go to the very first point outside of space result._point._element_iter = VectorIteratorType::make_system_end_iterator( system ); result.on_element_advance(); result._point._position = system.halo_system_size(); - return result; } + + private: + HaloNDimElement _point; + LinearizedNDimSystem< SizeType, VectorType > _neighbors_subspace; + VectorType _neighbors_start; + VectorIteratorType _neighbor_iter; // iterator in the sub-space of neighbors (0-based) + VectorIteratorType _neighbor_end; + + /** + * To be called when the iterator pointing to the neighbor is updated in order to update + * the actual neighbor's coordinates. + */ + inline void on_neighbor_iter_update() { + for( size_t i = 0; i < DIMS; i++ ) { + this->_point._neighbor[i] = this->_neighbors_start[i] + + this->_neighbor_iter->get_position()[i]; + } + } + + /** + * To be called after the iterator pointing to the element is updated in order to + * reset the information about the neighbor. + */ + void on_element_update() { + // reset everything + VectorType neighbors_range( DIMS ); + this->_point._system->compute_neighbors_range( + this->_point._element_iter->get_position(), + this->_neighbors_start, + neighbors_range + ); + // re-target _neighbors_subspace + this->_neighbors_subspace.retarget( neighbors_range ); + } + + /** + * To be called after the iterator pointing to the element is updated in order to update + * all information about the neighbor, like iterator, sorrounding halo and coordinates. + */ + void on_element_advance() { + this->on_element_update(); + + this->_neighbor_iter = VectorIteratorType( this->_neighbors_subspace ); + this->_neighbor_end = VectorIteratorType::make_system_end_iterator( this->_neighbors_subspace ); + + this->on_neighbor_iter_update(); + } }; } // namespace multigrid diff --git a/include/graphblas/utils/multigrid/linearized_halo_ndim_system.hpp b/include/graphblas/utils/multigrid/linearized_halo_ndim_system.hpp index cc84de621..d448fd426 100644 --- a/include/graphblas/utils/multigrid/linearized_halo_ndim_system.hpp +++ b/include/graphblas/utils/multigrid/linearized_halo_ndim_system.hpp @@ -1,4 +1,26 @@ +/* + * Copyright 2022 Huawei Technologies Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file linearized_halo_ndim_system.hpp + * @author Alberto Scolari (alberto.scolari@huawei.com) + * Definition of LinearizedHaloNDimSystem. + */ + #ifndef _H_GRB_ALGORITHMS_MULTIGRID_LINEARIZED_HALO_NDIM_SYSTEM #define _H_GRB_ALGORITHMS_MULTIGRID_LINEARIZED_HALO_NDIM_SYSTEM @@ -7,17 +29,58 @@ #include #include #include +#ifdef _DEBUG +#include +#endif #include "array_vector_storage.hpp" +#include "dynamic_vector_storage.hpp" +#include "ndim_vector.hpp" #include "linearized_ndim_system.hpp" -#include "linearized_halo_ndim_geometry.hpp" #include "linearized_halo_ndim_iterator.hpp" namespace grb { namespace utils { namespace multigrid { - // only with ArrayVectorStorage + /** + * Structure to represent an N-dimensional space (or \a system) of given sizes and to + * iterate on both the \a elements of the N-dimensional system and the N-dimensional + * \a neighbors of each element within a given \p halo. This facility takes into account + * the various cases where the element is at the corner, edge or face of the N-dimensional + * system, to which different neighbors correspond. Both elements and their neighbors are + * vectors in the N-dimensional system and as such described via both N-dimensional coordinates + * and a linear coordinate. + * + * This structure returns the number of elements of the underlying N-dimensional system + * (the \a base system) via #base_system_size() and the total sum of neighbors of all + * system elements via #halo_system_size(). + * + * The peculiar feature of this structure is the method #neighbour_linear_to_element(), to translate + * a neighbor index (i.e. a value from \a 0 to #halo_system_size(), uniquely identifying an element + * as neighbor of an element) to the N-dimensional coordinates of the corresponding elements in a time + * that is constant with respect to the input value (it depends on \p DIMS and the halo size). + * This facility allows the iterators of a LinearizedNDimSystem to be random-access: when advancing + * an iterator by an \a offset via the \a += method, the logic: + * + * - increments the index of the current neighbor (stored inside the iterator) by \a offset, thus + * computing the index of the destination neighbor (constant time) + * - translates the index of the destination neighbor to its base element's coordinates via + * #neighbour_linear_to_element() (constant time) + * + * The same method also returns the index of the destination neighbor within the sub-space of the base + * element's neighbors: hence, the logic can compute in constant time the destination base element + * and its destination neighbor. The constant time of this translation is achieved by pre-computing + * the number of neighbors for each element along each dimension: for example, inner elements in + * a 3D mesh with halo 1 have 27 neighbors. Thus, it suffices in principle to divide the neighbor + * index by 27 to compute the base element of a neighbor. Care must be taken for elements at the + * sides of each dimension: for example, a corner element on a face has 8 neighbors, while a corner + * element in an iternal slab (a 2D "plane" in a 3D mesh) has 12 neighbors. The pre-computed + * information and the logic also account for this. + * + * @tparam DIMS number of dimensions of the system + * @tparam SizeType type storing the system sizes and offsets + */ template< size_t DIMS, typename SizeType @@ -30,10 +93,20 @@ namespace grb { using BaseType = LinearizedNDimSystem< SizeType, VectorType >; using Iterator = LinearizedHaloNDimIterator< DIMS, SizeType >; - LinearizedHaloNDimSystem( ConstVectorStorageType sizes, SizeType halo ): + /** + * Construct a new LinearizedHaloNDimSystem object with given sizes and halo. + * + * The size of \p sizes must be exactly \p DIMS. Each size must be so that there is at least + * en element in the system with full halo neighors, i.e. for each size \a s + * s >= 2 * halo + 1 (otherwise an exception is thrown). + */ + LinearizedHaloNDimSystem( + ConstVectorStorageType sizes, + SizeType halo + ) : BaseType( sizes.cbegin(), sizes.cend() ), - _halo( halo ) { - + _halo( halo ) + { for( SizeType __size : sizes ) { if ( __size < 2 * halo + 1 ) { throw std::invalid_argument( @@ -43,9 +116,8 @@ namespace grb { } } - this->_system_size = __init_halo_search< SizeType, DIMS >( - this->get_sizes(), - _halo, this->_dimension_limits ); + this->_system_size = init_neigh_to_base_search( this->get_sizes(), + _halo, this->_dimension_limits ); assert( this->_dimension_limits.size() == DIMS ); } @@ -61,49 +133,427 @@ namespace grb { SelfType & operator=( SelfType && ) = delete; + /** + * Builds an iterator from the beginning of the system, i.e. from vector \a [0,0,...,0]. + * The iterator iterates on each neighbor and allows iterating on each element and on + * its neighbors. + */ Iterator begin() const { return Iterator( *this ); } + /** + * Build an iterator marking the end of the system; it should not be accessed. + */ Iterator end() const { return Iterator::make_system_end_iterator( *this ); } + /** + * Returns the size of the entire system, i.e. the number of neighbors of all elements. + */ size_t halo_system_size() const { return this->_system_size; } + /** + * Returns the size of the base system, i.e. number of elements (not considering neighbors). + */ size_t base_system_size() const { return this->BaseType::system_size(); } + /** + * Returns the halo size. + */ size_t halo() const { return this->_halo; } + /** + * Computes the first neighbor and the size of the N-dimensional range of neighbors + * around the given element's coordinates for the system \c this. + * + * @param[in] element_coordinates coordinates of the element to iterate around + * @param[out] neighbors_start first neighbor around \p element_coordinates to iterate from + * @param[out] neighbors_range vector of halos around \p element_coordinates; + * if \p element_coordinates is an inner point, all values equal #halo(), they are smaller + * otherwise (on corner, edge, or face). + */ void compute_neighbors_range( - const VectorType &system_coordinates, + const VectorType &element_coordinates, VectorType &neighbors_start, - VectorType &neighbors_range) const noexcept { - __compute_neighbors_range( this->get_sizes(), + VectorType &neighbors_range + ) const noexcept { + compute_first_neigh_and_range( this->get_sizes(), this->_halo, - system_coordinates, + element_coordinates, neighbors_start, neighbors_range ); } + /** + * Maps the linear index \p neighbor_linear of a neighbor to the vector \p base_element_vector + * of the corresponding element \p neighbor_linear is neighbor of, and returns the neighbor's + * number within the sub-space of \p base_element_vector 's neighbors. + * + * @param[in] neighbor_linear linear coordinate of input neighbor + * @param[out] base_element_vector vector of coordinates that identify which element + * \p neighbor_linear is neighbor of + * @return size_t the neighbor number w.r.t. to the corresponding element: if \a e is the system + * element \p neighbor_linear is neighbor of and \a e has \a n neighbors, then the return value + * \a 0<=iget_sizes(), - this->_system_size, this->_dimension_limits, this->_halo, neighbor, result ); + SizeType neighbor_linear, + VectorType &base_element_vector + ) const noexcept { + return map_neigh_to_base_and_index( this->get_sizes(), this->_system_size, + this->_dimension_limits, this->_halo, neighbor_linear, base_element_vector ); } private: const SizeType _halo; std::vector< NDimVector< SizeType, SizeType, DynamicVectorStorage< SizeType > > > _dimension_limits; size_t _system_size; + + /** + * Computes the total number of neighbors along a certain dimension and configuration by accumulating + * the neighbors along the smaller dimensions. + * + * The logic uses this buffer to iterate over the configurations of + * the previous dimension. Example: to compute in 3D the neighbors of an inner row of a face + * (configuration [0,1,0], dimension 1 - y), the logic needs the neighbors of + * en edge element and of an element internal to a face of the mesh, corresponding to + * the configurations [0,1,0] and [1,1,0], respectively. Hence, the caller + * must initialize a buffer with the values [X,1,0] (\a X meaning don't care) and pass + * as \p coords_buffer the pointer to the first position (the \a X ), where this function + * will write all possible values [0, \p halo ) to access the number of neighbors + * of the configurations of the previous dimension via \p prev_neighs and accumulate them. + * + * @param[in] prev_neighs neighbors in the configurations of the previous dimension + * @param[in,out] coords_buffer pointer to the first position of the configuration buffer + * for this dimension + * @param[in] halo halo size + * @param[in] local_size size (i.e., number of elements) along the current dimension, + * including the edges + * @return size_t the total number of neighbors for this configuration and this dimension + */ + static size_t accumulate_dimension_neighbours( + const NDimVector< SizeType, SizeType, DynamicVectorStorage< SizeType > > &prev_neighs, + SizeType* coords_buffer, + size_t halo, + size_t local_size + ) { + size_t neighs =0; + size_t h = 0; + for( ; h < halo && local_size > 1; h++ ) { + *coords_buffer = h; + + const size_t local_neighs = prev_neighs.at( coords_buffer ); + neighs += 2 * local_neighs; // the 2 sides + local_size -= 2; + } + *coords_buffer = h; + neighs += local_size * prev_neighs.at( coords_buffer ); // innermost elements + return neighs; + } + + /** + * Computes the number of neighbors for each configuration along dimension 0: + * corner, edge, face, inner element. + * + * Example: in a 3D system with \p halo = 1, the configurations along dimension 0 are 8: + * 1. z axis - face: + * 1. y axis - top row: corner element (8 neighbors), edge element (12 neighbors) + * 2 y axis - inner row: edge element (12 neighbors), face inner element (18 neighbors) + * 2. z axis - inner slab: + * 1. y axis - top row: edge element (12 neighbors), face inner element (18 neighbors) + * 2 y axis - inner row: face inner element (18 neighbors), inner element (27 neighbors) + * + * @param[in] halo halo size + * @param[out] config_neighbors the storage object for each configuration + */ + static void compute_dim0_neighbors( + size_t halo, + NDimVector< SizeType, SizeType, DynamicVectorStorage< SizeType > >& config_neighbors + ) { + using it_type = typename NDimVector< SizeType, SizeType, + DynamicVectorStorage< SizeType > >::DomainIterator; + it_type end = config_neighbors.domain_end(); + for( it_type it = config_neighbors.domain_begin(); it != end; ++it ) { + size_t res = 1; + for( size_t h: it->get_position() ) res *= (h + 1 + halo); + config_neighbors.at( it->get_position() ) = res; + } + } + + /** + * Initializes the search space of neighbors for the -> translation. + * + * This function populates an std::vector<> with the number of neighors for each dimension + * and each configuration (corner, edge, face, inner). + * Along each dimension \a d, it stores an \a n -dimensional vector + * NDimVector> (n = 2 ^ d) with all + * possible numbers of neighbors along that dimension, depending on the position of the element + * (corner, edge, face, inner volume); for example, for 3 dimensions: + * - dimension 2 (z axis) moves along "slabs" of a 3D systems, where the total number of neighbors + * depends on whether the slab is a face of the mesh of an internal slab (2 possible configurations: + * face slabs or inner slabs) + * - dimension 1 (y axis) moves along "rows" within each slab, whose total number of neighbors + * depends on whether the row is at the extreme sides (top or bottom of the face) or inside; + * in turn, each type of slab has different geometry (face slabs comprise mesh corners, edges and faces, + * while inner slabs comprise edges, faces and inner elements), thus resulting in 2*2 different + * configurations of dimension-1 total neighbors + * - dimension 0 (x axis) moves along "column" elements within each row, where the first (or last) + * column has a different number of neighbors than the inner ones; here again are two configuration + * for each dimension-1 configuration, leading to a total of 8 dimension-1 configurations + * Within each dimension \a d, each configuration (as per the above explanation) can be identified + * via a vector of N - d coordinates; to limit the data storage, every dimension stores the + * total number of neighbors only at the first side and inside, since the second side is identical + * to the first one: for example, along the z axis the first and last slab (those on the two extremes) + * have the same size, and one only is stored. Therefore, with halo = 1 a vector identifying + * a configuration is composed only of 0s and 1s. For example, the vector [0,1,0] identifies: + * - rightmost 0 (z axis): first (or last) slab, i.e. face slab + * - (middle) 1 (y axis): inner row + * - leftmost 0 (x axis): first (or last) element, i.e. on the edge of the mesh + * In a 3D space with halo = 1, this element has 12 neighbors (it is on the edge of a face). + * + * @paragraph[in] vector of sizes sizes of the N-dimensional system + * @param[in] halo halo size + * @param[out] dimension_limits the std::vector<> with the neighbors information for each dimension + * and each configuration + * @return size_t the number of neighbors of the entire system + */ + static size_t init_neigh_to_base_search( + typename LinearizedNDimSystem< SizeType, + ArrayVectorStorage< DIMS, SizeType > >::ConstVectorReference + sizes, + size_t halo, + std::vector< NDimVector< SizeType, SizeType, DynamicVectorStorage< SizeType > > >& dimension_limits + ) { + using nd_vec = NDimVector< SizeType, SizeType, DynamicVectorStorage< SizeType > >; + using nd_vec_iterator = typename nd_vec::DomainIterator; + + std::vector halo_sizes( DIMS, halo + 1); + dimension_limits.emplace_back(halo_sizes); + // initialize values + compute_dim0_neighbors( halo, dimension_limits[0] ); + for( size_t i = 1; i < DIMS; i++ ) { + std::vector halos( DIMS - i, halo + 1 ); + dimension_limits.emplace_back(halos); + } + + std::array< SizeType, DIMS > prev_coords_buffer; // store at most DIMS values + SizeType* const prev_coords = prev_coords_buffer.data(); + SizeType* const second = prev_coords + 1; // store previous coordinates from second position + for( size_t dimension = 1; dimension < DIMS; dimension++ ) { + const nd_vec& prev_neighs{dimension_limits[dimension - 1]}; + nd_vec& current_neighs{dimension_limits[dimension]}; + + nd_vec_iterator end = current_neighs.domain_end(); + for( nd_vec_iterator it = current_neighs.domain_begin(); it != end; ++it ) { + typename nd_vec::ConstDomainVectorReference current_halo_coords = it->get_position(); + + std::copy( it->get_position().cbegin(), it->get_position().cend(), second ); + size_t local_size = sizes[dimension - 1]; + const size_t neighs = accumulate_dimension_neighbours(prev_neighs, prev_coords, halo, local_size); + current_neighs.at(current_halo_coords) = neighs; + } + } + return accumulate_dimension_neighbours( dimension_limits[DIMS - 1], prev_coords, halo, sizes.back() ); + } + + /** + * For the given system (with sizes \p _system_sizes), the given halo size \p halo, + * the given element's coordinates \p element_coordinates, computes the coordinates + * of the first neighbor of \p element_coordinates into \p neighbors_start (within the main system) + * and the range of neighbors of \p element_coordinates, i.e. the sub-space of neighbors of + * \p element_coordinates; hence, \p neighbors_range stores at most 2 *<\em> \p halo + * + 1 per coordinate. + * + * @param[in] _system_sizes sizes of the N-dimensional system + * @param[in] halo halo size + * @param[in] element_coordinates coordinates of the considered element + * @param[out] neighbors_start stores the (absolute) coordinates of the first neighbor + * of \p element_coordinates + * @param[out] neighbors_range stores the range of neighbors around \p element_coordinates + */ + static void compute_first_neigh_and_range( + const ArrayVectorStorage< DIMS, SizeType > &_system_sizes, + const SizeType halo, + const ArrayVectorStorage< DIMS, SizeType > &element_coordinates, + ArrayVectorStorage< DIMS, SizeType > &neighbors_start, + ArrayVectorStorage< DIMS, SizeType > &neighbors_range + ) { + for( SizeType i = 0; i < DIMS/* - 1*/; i++ ) { + const SizeType start = element_coordinates[i] <= halo ? 0 : element_coordinates[i] - halo; + const SizeType end = std::min( element_coordinates[i] + halo, _system_sizes[i] - 1 ); + neighbors_start[i] = start; + neighbors_range[i] = end - start + 1; + } + } + +#ifdef _DEBUG + template< typename IterType > static std::ostream & print_sequence( IterType begin, IterType end ) { + for( ; begin != end; ++begin ) { + std::cout << *begin << ' '; + } + return std::cout; + } +#endif + + /** + * Maps a neighbor's linear coordinate \p neighbor_linear to the element \p element_vector it is + * neighbor of and also returns the neighbor index of \p neighbor_linear within the sub-space + * of \p element_vector's neighbors. + * + * @param[in] sizes main system sizes along all dimensions + * @param[in] system_size total size of the neighbors system, i.e. the total number of neighbors + * @param[in] neighbors_per_dimension along each dimension \a d, it stores an \a n -dimensional vector + * NDimVector> (n = 2 ^ d) with all + * possible numbers of neighbors along that dimension, depending on the position of the element + * (corner, edge, face, inner volume) + * @param[in] halo halo size + * @param[in] neighbor_linear linear coordinate of the neighbor + * @param[out] element_vector coordinates vector representing the element \p neighbor_linear is + * neighbor of + * @return size_t the index of the neighbor within the element's neighbors + */ + static size_t map_neigh_to_base_and_index( + const std::array< SizeType, DIMS > &sizes, + size_t system_size, + const std::vector< NDimVector< SizeType, SizeType, DynamicVectorStorage< SizeType > > > + &neighbors_per_dimension, + SizeType halo, + SizeType neighbor_linear, + ArrayVectorStorage< DIMS, SizeType > &element_vector + ){ + if( neighbor_linear > system_size ) { + throw std::invalid_argument( "neighbor number ( " + std::to_string( neighbor_linear ) + + " ) >= system size ( " + std::to_string( system_size ) + " )"); + } + ArrayVectorStorage< DIMS, SizeType > configuration( DIMS ); +#ifdef _DEBUG + size_t * const halo_coords_end = configuration.data() + DIMS; +#endif + std::fill_n( configuration.begin(), DIMS, 0 ); + + for( size_t _dim = DIMS; _dim > 0; _dim--) { + + // each iteration looks for the base element along a dimension via the number of neighbors + // each element has: once previous_neighs reaches neighbor_linear, the corresponding + // base element is found; if the control reaches the end, this means it must explore + // the following dimension to find the base element: this is why dimensions are explored + // starting from the highest, because moving along a higher dimension means "skipping" + // more neighbors; then the search "zooms in"to a smaller dimension to find the base element + + // start from highest dimension + const size_t dimension = _dim - 1; + // how many elements along this dimension + const size_t dimension_size = sizes[dimension]; + // configurations of neighbors along this dimension + // (e.g., corner, edge; or edge, inner element) + const NDimVector< SizeType, SizeType, DynamicVectorStorage< SizeType > > + & neighbors = neighbors_per_dimension[dimension]; + + // coordinate to modify to identify each configuration + SizeType * const halo_coords_begin = configuration.data() + dimension; +#ifdef _DEBUG + std::cout << "DIMENSION " << dimension << std::endl + << "- setup - neighbour " << neighbor_linear << std::endl + << "\thalo : "; + print_sequence( halo_coords_begin, halo_coords_end ) << std::endl; +#endif + size_t h =0; // configuration type along this dimension + size_t previous_neighs = 0; + *halo_coords_begin = h; + // account for neighbors in the first elements along the dimension, within halo distance: + // these elements have a number of neighbors that depends on the distance h + // and on the configuration + size_t halo_max_neighs = neighbors.at( halo_coords_begin ); + while( h < halo && neighbor_linear >= previous_neighs + halo_max_neighs ) { + h++; + *halo_coords_begin = h; + previous_neighs += halo_max_neighs; + halo_max_neighs = neighbors.at( halo_coords_begin ); + } +#ifdef _DEBUG + std::cout << "- initial halo - neighbour " << neighbor_linear << std::endl + << "\th " << h << std::endl + << "\thalo : "; + print_sequence( halo_coords_begin, halo_coords_end ) << std::endl; + std::cout << "\thalo_max_neighs " << halo_max_neighs << std::endl; +#endif + if ( h < halo ){ + // we have already counted enough neighbors: neighbor_linear is thus a neighbor + // of one of the first (< halo) elements along this dimension: go to next dimension + element_vector[dimension] = h; + neighbor_linear -= previous_neighs; +#ifdef _DEBUG + std::cout << "end neighbour " << neighbor_linear << std::endl; +#endif + continue; + } + // saturation occurred: the base element is beyond the halo: go on with the search + + // inner elements have the same number of neighbors halo_max_neighs: compute + // the base element via division + const size_t distance_from_halo = ( neighbor_linear - previous_neighs ) / halo_max_neighs; +#ifdef _DEBUG + std::cout << "- before middle elements - neighbour " << neighbor_linear << std::endl + << "\tprevious_neighs " << previous_neighs << std::endl + << "\thalo_max_neighs " << halo_max_neighs << std::endl + << "\tdistance_from_halo " << distance_from_halo << std::endl + << "\tdimension_size " << dimension_size << std::endl; +#endif + if ( distance_from_halo < dimension_size - 2 * halo ) { + // the base element is one of the internal elements along this dimension: + // hence return its diatance from the halo + the halo itself (= distance from + // beginning of the space) + element_vector[dimension] = distance_from_halo + halo; + neighbor_linear -= (previous_neighs + distance_from_halo * halo_max_neighs) ; +#ifdef _DEBUG + std::cout << "end neighbour " << neighbor_linear << std::endl; +#endif + continue; + } + // base element is even beyond inner elements, it might be among the elements at the end, + // which also have different numbers of neighbors (specular to initial elements) + previous_neighs += ( dimension_size - 2 * halo ) * halo_max_neighs; +#ifdef _DEBUG + std::cout << "- after middle elements -neighbour " << neighbor_linear << std::endl; + std::cout << "\tprevious_neighs " << previous_neighs << std::endl; + std::cout << "\thalo_max_neighs " << halo_max_neighs << std::endl; +#endif + // look for base the element at the end of the dimension: specular search to beginning, + // just with h decreasing + h = halo - 1; + *halo_coords_begin = h; + halo_max_neighs = neighbors.at( halo_coords_begin ); + while( h > 0 && neighbor_linear >= previous_neighs + halo_max_neighs ) { + h--; + *halo_coords_begin = h; + previous_neighs += halo_max_neighs; + halo_max_neighs = neighbors.at( halo_coords_begin ); + } + neighbor_linear -= previous_neighs; +#ifdef _DEBUG + std::cout << "- final halo - neighbour " << neighbor_linear << std::endl; + std::cout << "\tadding h " << h << " previous_neighs " << previous_neighs << std::endl; +#endif + // ( dimension_size - 1 ) because coordinates are 0-based and neighbor + // is "inside" range [ previous_neighs, previous_neighs + halo_max_neighs ] + element_vector[dimension] = dimension_size - 1 - h; +#ifdef _DEBUG + std::cout << "end neighbour " << neighbor_linear << std::endl; +#endif + } + return neighbor_linear; + } + }; } // namespace multigrid diff --git a/include/graphblas/utils/multigrid/linearized_ndim_iterator.hpp b/include/graphblas/utils/multigrid/linearized_ndim_iterator.hpp index f65ec8831..199d08926 100644 --- a/include/graphblas/utils/multigrid/linearized_ndim_iterator.hpp +++ b/include/graphblas/utils/multigrid/linearized_ndim_iterator.hpp @@ -15,6 +15,12 @@ * limitations under the License. */ +/** + * @file linearized_ndim_iterator.hpp + * @author Alberto Scolari (alberto.scolari@huawei.com) + * Definition of LinearizedNDimIterator. + */ + #ifndef _H_GRB_ALGORITHMS_MULTIGRID_NDIM_ITERATOR #define _H_GRB_ALGORITHMS_MULTIGRID_NDIM_ITERATOR @@ -25,6 +31,8 @@ #include #include +#include + #include "array_vector_storage.hpp" namespace grb { @@ -37,6 +45,17 @@ namespace grb { typename InternalVectorType > class LinearizedNDimSystem; + /** + * Iterator object couled to a LinearizedNDimSystem: each object points to a vector + * in the creating LinearizedNDimSystem#dimensions()-dimensions space, to which also a + * linear position is associated; both the vector and the linear position can be retrieved + * via the \a -> method. + * + * It meets the requirements of a random access iterator. + * + * @tparam SizeType integral type to store the size of each dimension + * @tparam InternalStorageType internal vector type to store the sizes + */ template< typename SizeType, typename InternalVectorType @@ -47,6 +66,11 @@ namespace grb { using ConstVectorReference = const VectorType&; using SelfType = LinearizedNDimIterator< SizeType, InternalVectorType >; + /** + * Structure describing a couple vector/linear coordinate: the vector + * can be obtained via #get_position() while the linear coordinate via + * #get_linear_position(). + */ struct NDimPoint { private: const LinNDimSysType* system; // pointer because of copy assignment @@ -86,11 +110,30 @@ namespace grb { using reference = const value_type&; using difference_type = signed long; + /** + * Construct a new LinearizedNDimIterator object from the original LinNDimSysType + * object, storing the information about system dimensionality and sizes. The referenced + * vector is the first one in the system, i.e. with all coordinates being \a 0. + * + * If \p _system is not a valid object anymore, all iterators created from it are also + * not valid. + */ LinearizedNDimIterator( const LinNDimSysType &_system ) noexcept : _p( _system ) {} - template< typename IterT > LinearizedNDimIterator( const LinNDimSysType &_system, IterT begin ) noexcept : + /** + * Construct a new LinearizedNDimIterator object from the original LinNDimSysType + * object, storing the information about system dimensionality and sizes. The referenced + * vector is initialized with the coordinates referenced via the iterator \p begin, + * which should have at least \p _system.dimensions() valid successors. + * + * If \p _system is not a valid object anymore, all iterators created from it are also + * not valid. + */ + template< typename IterT > LinearizedNDimIterator( + const LinNDimSysType &_system, IterT begin + ) noexcept : _p( _system ) { std::copy_n( begin, _system.dimensions(), this->_p.coords.begin() ); @@ -105,10 +148,14 @@ namespace grb { ~LinearizedNDimIterator() {} + /** + * Moves to the next vector in the multi-dimensional space, corresponding to + * advancing the linear coordinate by 1. + */ SelfType & operator++() noexcept { - bool rewind{ true }; + bool rewind = true; // rewind only the first N-1 coordinates - for( size_t i { 0 }; i < this->_p.system->dimensions() - 1 && rewind; i++ ) { + for( size_t i = 0; i < this->_p.system->dimensions() - 1 && rewind; i++ ) { SizeType& coord = this->_p.coords[ i ]; // must rewind dimension if we wrap-around SizeType plus = coord + 1; @@ -122,24 +169,35 @@ namespace grb { return *this; } + /** + * Moves \p _offset vectors ahead in the multi-dimensional space, corresponding to + * advancing the linear coordinate by \p _offset. + * + * If the destination vector is outside of the system (i.e. the corresponding + * linear coordinate is beyond the underlying LinearizedNDimSystem#system_size()), + * an exception is thrown. + */ SelfType & operator+=( size_t offset ) { - size_t linear{ _p.get_linear_position() + offset }; + size_t linear = _p.get_linear_position() + offset; if( linear > _p.system->system_size() ) { throw std::invalid_argument("increment is too large"); } + if( offset == 1 ) { + return operator++(); + } _p.system->linear_to_ndim( linear, _p.coords ); return *this; } + /** + * Returns the difference between \p _other and \c this in the linear space. + * + * It throws if the result cannot be stored as a difference_type variable. + */ difference_type operator-( const SelfType &other ) const { - size_t a_pos{ _p.get_linear_position() }, - b_pos{ other._p.get_linear_position() }; - size_t lowest{ std::min( a_pos, b_pos ) }, highest{ std::max( a_pos, b_pos )}; - if( highest - lowest > static_cast< size_t >( - std::numeric_limits< difference_type >::max() ) ) { - throw std::invalid_argument( "iterators are too distant" ); - } - return ( static_cast< difference_type >( a_pos - b_pos ) ); + return grb::utils::compute_signed_distance< difference_type, SizeType >( + _p.get_linear_position(), other._p.get_linear_position() ); + } reference operator*() const { @@ -151,22 +209,26 @@ namespace grb { } bool operator!=( const SelfType &o ) const { - const size_t dims{ this->_p.system->dimensions() }; + const size_t dims = this->_p.system->dimensions(); if( dims != o._p.system->dimensions() ) { throw std::invalid_argument("system sizes do not match"); } - bool equal{ true }; - for( size_t i{0}; i < dims && equal; i++) { + bool equal = true; + for( size_t i =0; i < dims && equal; i++) { equal &= ( this->_p.coords[i] == o._p.coords[i] ); } return !equal; } - // implementation depending on logic in operator++ + /** + * Facility to build an end iterator. + * + * Its implementation depending on the logic in operator++. + */ static SelfType make_system_end_iterator( const LinNDimSysType &_system ) { // fill with 0s SelfType iter( _system ); - size_t last{ iter->system->dimensions() - 1 }; + size_t last = iter->system->dimensions() - 1; // store last size in last position iter._p.coords[ last ] = iter->system->get_sizes()[ last ]; return iter; diff --git a/include/graphblas/utils/multigrid/linearized_ndim_system.hpp b/include/graphblas/utils/multigrid/linearized_ndim_system.hpp index 3e4c15b14..7b3c94341 100644 --- a/include/graphblas/utils/multigrid/linearized_ndim_system.hpp +++ b/include/graphblas/utils/multigrid/linearized_ndim_system.hpp @@ -15,6 +15,12 @@ * limitations under the License. */ +/** + * @file linearized_ndim_system.cpp + * @author Alberto Scolari (alberto.scolari@huawei.com) + * Definition of \p LinearizedNDimSystem. + */ + #ifndef _H_GRB_ALGORITHMS_MULTIGRID_NDIM_SYSTEM_LINEARIZER #define _H_GRB_ALGORITHMS_MULTIGRID_NDIM_SYSTEM_LINEARIZER @@ -29,29 +35,24 @@ #include "ndim_system.hpp" #include "linearized_ndim_iterator.hpp" -// #include "array_vector_storage.hpp" - -/** - * @file linearized_ndim_system.cpp - * @author Alberto Scolari (alberto.scolari@huawei.com) - * Definition of \p LinearizedNDimSystem. - * - * @date 2022-10-24 - */ namespace grb { namespace utils { namespace multigrid { /** - * Extends a \p NDimSystem by linearizing it, i.e. it provides facilities to map a vector in - * NDimSystem#dimensions() dimensions to a linear value ranging from \a 0 to #system_size() + * Extends an NDimSystem by linearizing it, i.e. it provides facilities to map a vector in + * NDimSystem#dimensions() dimensions to a linear value ranging from \a 0 to #system_size() (excluded) * and vice versa. Such a linearized representation allows user logic to iterate over the system: - * iterators are indeed available via #begin()/#end(). + * iterators are indeed available via #begin()/#end(). Consecutive system elements along dimension 0 + * are mapped to consecutive linear values, while elements consecutive along dimension 1 + * are mapped at offset #get_offsets()[1] = #get_sizes()[0], elements along dimension 2 + * are mapped at offset #get_offsets()[2] = #get_sizes()[0] * #get_sizes()[0], and so on. * * Further facilities are methods to map users' vectors from linear to NDimSystem#dimensions()-dimensional * or vice versa and also to "retaget" the system, i.e. to represent a system of same dimensionality - * but different sizes. + * but different sizes; this last feature is a mere performance optimization aimed at + * reusing existing objects instead of deleting them and allocating new memory. * * @tparam SizeType integral type to store the size of each dimension * @tparam InternalStorageType internal vector type to store the sizes @@ -66,32 +67,44 @@ namespace grb { using BaseType = NDimSystem< SizeType, InternalVectorType >; using SelfType = LinearizedNDimSystem< SizeType, InternalVectorType >; using VectorType = typename BaseType::VectorType; - using VectorReference = typename BaseType::VectorReference; using ConstVectorReference = typename BaseType::ConstVectorReference; using VectorStorageType = typename VectorType::VectorStorageType; using ConstVectorStorageType = typename VectorType::ConstVectorStorageType; using Iterator = LinearizedNDimIterator< SizeType, InternalVectorType >; + /** + * Construct a new LinearizedNDimSystem object from an iterable range, + * where each iterator's position stores the size along each dimension; example: + * *begin is the size along dimension 0, *(++begin) is the size along dimension 1 ... + */ template< typename IterT > LinearizedNDimSystem( IterT begin, IterT end) noexcept : BaseType( begin, end ), - offsets( std::distance( begin, end ) ) + _offsets( std::distance( begin, end ) ) { - this->_system_size = compute_offsets( begin, end, this->offsets.begin() ) ; + this->_system_size = compute_range_product( begin, end, this->_offsets.begin() ) ; } + /** + * Construct a new LinearizedNDimSystem object with dimensions \p _sizes.size() + * and sizes stored in \p _sizes. + */ LinearizedNDimSystem( const std::vector< size_t > &_sizes ) noexcept : LinearizedNDimSystem( _sizes.cbegin(), _sizes.cend() ) {} - LinearizedNDimSystem( size_t _dimensions, size_t max_value ) noexcept : - BaseType( _dimensions, max_value ), - offsets( _dimensions ), + /** + * Construct a new LinearizedNDimSystem object with \p _dimensions dimensions + * and sizes all equal to \p max_value. + */ + LinearizedNDimSystem( size_t _dimensions, size_t _size ) noexcept : + BaseType( _dimensions, _size ), + _offsets( _dimensions ), _system_size( _dimensions ) { - SizeType v{1}; - for( size_t i{0}; i < _dimensions; i++ ) { - this->offsets[i] = v; - v *= max_value; + SizeType v = 1; + for( size_t i =0; i < _dimensions; i++ ) { + this->_offsets[i] = v; + v *= _size; } this->_system_size = v; } @@ -101,7 +114,7 @@ namespace grb { LinearizedNDimSystem( const SelfType &original ) = default; LinearizedNDimSystem( SelfType &&original ) noexcept: - BaseType( std::move(original) ), offsets( std::move( original.offsets ) ), + BaseType( std::move(original) ), _offsets( std::move( original._offsets ) ), _system_size( original._system_size ) { original._system_size = 0; } @@ -112,34 +125,59 @@ namespace grb { SelfType& operator=( SelfType &&original ) = delete; + /** + * Computes the size of the system, i.e. its number of elements; + * this corresponds to the product of the sizes along all dimensions. + */ inline size_t system_size() const { return this->_system_size; } + /** + * Get the offsets of the system, i.e. by how many linear elements moving along + * a dimension corresponds to. + */ inline ConstVectorReference get_offsets() const { - return this->offsets; + return this->_offsets; } + /** + * Computes the #dimensions()-dimensions vector the linear value in input corresponds to. + * + * @param[in] linear linear index + * @param[out] output output vector \p linear corresponds to + */ void linear_to_ndim( size_t linear, VectorReference output ) const { if( linear > this->_system_size ) { throw std::range_error( "linear value beyond system" ); } - for( size_t _i{ this->offsets.dimensions() }; _i > 0; _i-- ) { - const size_t dim{ _i - 1 }; - const size_t coord{ linear / this->offsets[dim] }; + for( size_t _i = this->_offsets.dimensions(); _i > 0; _i-- ) { + const size_t dim = _i - 1; + const size_t coord = linear / this->_offsets[dim]; output[dim] = coord; - linear -= ( coord * this->offsets[dim] ); + linear -= ( coord * this->_offsets[dim] ); } assert( linear == 0 ); } + /** + * Computes the linear value the input vector corresponds to; this method takes in input + * a const reference to \p InternalVectorType and checks whether each value in the input + * vector \p ndim_vector is within the system sizes (otherwise it throws). + */ size_t ndim_to_linear_check( ConstVectorReference ndim_vector) const { return this->ndim_to_linear_check( ndim_vector.storage() ); } + /** + * Computes the linear value the input vector corresponds to; this method takes in input + * a const reference to the underlying storage of \p InternalVectorType and checks + * whether each value in the input vector \p ndim_vector is within the system sizes + * (otherwise it throws). + */ size_t ndim_to_linear_check( ConstVectorStorageType ndim_vector ) const { - size_t linear { 0 }; - for( size_t i { 0 }; i < this->dimensions(); i++ ) { + size_t linear = 0; + for( size_t i = 0; i < this->dimensions(); i++ ) { if( ndim_vector[i] >= this->get_sizes()[i] ) { throw std::invalid_argument( "input vector beyond system sizes" ); } @@ -147,19 +185,34 @@ namespace grb { return ndim_to_linear( ndim_vector ); } + /** + * Computes the linear value the input vector corresponds to; this method takes in input + * a const reference to \p InternalVectorType but does not check whether each value in the input + * vector \p ndim_vector is within the system sizes. + */ size_t ndim_to_linear( ConstVectorReference ndim_vector) const { return this->ndim_to_linear( ndim_vector.storage() ); } + /** + * Computes the linear value the input vector corresponds to; this method takes in input + * a const reference to the underlying storage of \p InternalVectorType but does not check + * whether each value in the input vector \p ndim_vector is within the system sizes. + */ size_t ndim_to_linear( ConstVectorStorageType ndim_vector ) const { - size_t linear { 0 }; - for( size_t i { 0 }; i < this->dimensions(); i++ ) { - linear += this->offsets[i] * ndim_vector[i]; + size_t linear = 0; + for( size_t i = 0; i < this->dimensions(); i++ ) { + linear += this->_offsets[i] * ndim_vector[i]; } return linear; } // must be same dimensionality + /** + * Retargets the current object to describe a system with the same number of dimensions + * and sizes \p _new_sizes. If the number of dimensions of \p _new_sizes does not match + * #dimensions(), an exception is thrown. + */ void retarget( ConstVectorReference _new_sizes ) { if( _new_sizes.dimensions() != this->_sizes.dimensions() ) { throw std::invalid_argument("new system must have same dimensions as previous: new " @@ -167,26 +220,42 @@ namespace grb { + std::to_string( this->_sizes.dimensions() ) ); } this->_sizes = _new_sizes; // copy - this->_system_size = compute_offsets( _new_sizes.begin(), _new_sizes.end(), this->offsets.begin() ) ; + this->_system_size = compute_range_product( _new_sizes.begin(), _new_sizes.end(), this->_offsets.begin() ) ; } + /** + * Returns a beginning iterator to the #dimensions()-dimensional system \c this describes. + * The provided iterator references a system point, described both via its #dimensions()-dimensional + * coordinates and via a linear value from \a 0 to #system_size() (excluded). + */ Iterator begin() const { return Iterator( *this ); } + /** + * Return an iterator to the end of the system; this iterator should not be + * referenced nor incremented. + */ Iterator end() const { return Iterator::make_system_end_iterator( *this ); } private: - VectorType offsets; + VectorType _offsets; size_t _system_size; + /** + * Incrementally computes the product of the input iterator's range, storing each value + * into the position pointed to the output iterator; the accumulation starts from 1 + * (also the first output values), and the last accumulated value is returned directly + * (and not stored). This assumes that the output container can store at least as many values + * as in the input range. + */ template< typename IterIn, typename IterOut - > static size_t compute_offsets( IterIn in_begin, IterIn in_end, IterOut out_begin ) { - size_t prod{1}; + > static size_t compute_range_product( IterIn in_begin, IterIn in_end, IterOut out_begin ) { + size_t prod = 1; for( ; in_begin != in_end; ++in_begin, ++out_begin ) { *out_begin = prod; prod *= *in_begin; diff --git a/include/graphblas/utils/multigrid/ndim_system.hpp b/include/graphblas/utils/multigrid/ndim_system.hpp index 9d387ce32..f184a7042 100644 --- a/include/graphblas/utils/multigrid/ndim_system.hpp +++ b/include/graphblas/utils/multigrid/ndim_system.hpp @@ -15,26 +15,21 @@ * limitations under the License. */ +/** + * @file ndim_system.cpp + * @author Alberto Scolari (alberto.scolari@huawei.com) + * Definition of NDimSystem. + */ + #ifndef _H_GRB_ALGORITHMS_MULTIGRID_NDIM_SYSTEM #define _H_GRB_ALGORITHMS_MULTIGRID_NDIM_SYSTEM #include #include #include -#include #include #include -#include "array_vector_storage.hpp" - -/** - * @file ndim_system.cpp - * @author Alberto Scolari (alberto.scolari@huawei.com) - * Definition of \p NDimSystem. - * - * @date 2022-10-24 - */ - namespace grb { namespace utils { namespace multigrid { @@ -61,7 +56,8 @@ namespace grb { using SelfType = NDimSystem< SizeType, InternalVectorType >; /** - * Construct a new NDimSystem object from an iterable range. + * Construct a new NDimSystem object from an iterable range, where each referenced value + * is a size of the system. * * The dimension is computed as \a std::distance(begin,end), i.e. * \p IterT should be a random-access iterator for performance. diff --git a/include/graphblas/utils/multigrid/ndim_vector.hpp b/include/graphblas/utils/multigrid/ndim_vector.hpp index 26ee084e6..7992f23f6 100644 --- a/include/graphblas/utils/multigrid/ndim_vector.hpp +++ b/include/graphblas/utils/multigrid/ndim_vector.hpp @@ -1,4 +1,26 @@ +/* + * Copyright 2022 Huawei Technologies Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file ndim_vector.hpp + * @author Alberto Scolari (alberto.scolari@huawei.com) + * Definition of NDimVector. + */ + #ifndef _H_GRB_ALGORITHMS_MULTIGRID_NDIM_VECTOR #define _H_GRB_ALGORITHMS_MULTIGRID_NDIM_VECTOR @@ -20,7 +42,7 @@ namespace grb { * The user constructs an object by passing the sizes (as an N-dimensional vector) * of the iteration space and accesses the stored data via an N-dimensional vector of coordinates. * - * Example: if the user constructs an \p NDimVector with 3D sizes \a [2,3,4], she can access data + * Example: if the user constructs an NDimVector with 3D sizes \a [2,3,4], she can access data * via a 3D coordinates vector of ranges \a [0-1]x[0-2]x[0-3] (here \a x denoting the cartesian product) * by using the #at() method. * @@ -50,12 +72,22 @@ namespace grb { NDimVector() = delete; + /** + * Construct a new NDimVector object with sizes read from the iteration range + * and number of dimensions equal to the range distance; the data values are + * \b not initialized. + */ template< typename IterT > NDimVector( IterT begin, IterT end) : _linearizer( begin, end ) { this->data = new DataType[ _linearizer.system_size() ]; } + /** + * Construct a new NDimVector object with sizes read from the \p _sizes + * and number of dimensions equal to \p _sizes.size(); the data values are + * \b not initialized. + */ NDimVector( const std::vector< size_t > &_sizes ) : NDimVector( _sizes.cbegin(), _sizes.cend() ) {} @@ -81,34 +113,64 @@ namespace grb { this->clean_mem(); } + /** + * Number of dimensions of the underlying geometrical space. + */ size_t dimensions() const { return this->_linearizer.dimensions(); } + /** + * Size of the the underlying geometrical space, i.e. number of stored data elements. + */ size_t data_size() const { return this->_linearizer.system_size(); } + /** + * Access the data element at N-dimension coordinate given by the iterable + * \p coordinates. + */ inline DataType& at( ConstDomainVectorReference coordinates ) { return this->data[ this->get_coordinate( coordinates.storage() ) ]; } + /** + * Const-access the data element at N-dimension coordinate given by the iterable + * \p coordinates. + */ inline const DataType& at( ConstDomainVectorReference coordinates ) const { return this->data[ this->get_coordinate( coordinates.storage() ) ]; } + /** + * Access the data element at N-dimension coordinate given by the vector + * storage object \p coordinates. + */ inline DataType& at( ConstDomainVectorStorageType coordinates ) { return this->data[ this->get_coordinate( coordinates ) ]; } + /** + * Const-access the data element at N-dimension coordinate given by the vector + * storage object \p coordinates. + */ inline const DataType& at( ConstDomainVectorStorageType coordinates ) const { return this->data[ this->get_coordinate( coordinates ) ]; } + /** + * Returns an iterator to the beginning of the N-dimensional underlyign space, + * i.e. a vector \a [0,0,0,...,0]. + */ DomainIterator domain_begin() const { return this->_linearizer.begin(); } + /** + * Returns an iterator to the end of the N-dimensional underlyign space. + * This iterator should not be referenced nor incremented. + */ DomainIterator domain_end() const { return this->_linearizer.end(); } diff --git a/tests/smoke/hpcg.cpp b/tests/smoke/hpcg.cpp index 3e318b0cb..c95cfba85 100644 --- a/tests/smoke/hpcg.cpp +++ b/tests/smoke/hpcg.cpp @@ -1,6 +1,6 @@ /* - * Copyright 2021 Huawei Technologies Co., Ltd. + * Copyright 2022 Huawei Technologies Co., Ltd. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,12 +18,10 @@ /** * @file hpcg_test.cpp * @author Alberto Scolari (alberto.scolari@huawei.com) - * @brief Test for HPCG simulations on N-dimensional physical problems. + * Test for HPCG simulations on N-dimensional physical problems. * * This test strictly follows the parameter and the formulation of the reference HPCG * benchmark impementation in https://github.com/hpcg-benchmark/hpcg. - * - * @date 2021-04-30 */ #include @@ -41,23 +39,20 @@ // to easily trace the steps of the solver, just define this symbol // #define HPCG_PRINT_STEPS -// here we define a custom macro and do not use NDEBUG since the latter is not defined for smoke tests +// here we define a custom macro, which enables tracing only for HPCG code #ifdef HPCG_PRINT_STEPS #include // HPCG_PRINT_STEPS requires defining the following symbols -/** - * @brief simply prints \p args on a dedicated line. - */ +// prints args on a dedicated line #define DBG_println( args ) std::cout << args << std::endl; // forward declaration for the tracing facility template< typename T > void print_norm( const grb::Vector< T > &r, const char * head ); -/** - * @brief prints \p head and the norm of \p r. - */ +// prints head and the norm of r #define DBG_print_norm( vec, head ) print_norm( vec, head ) #endif +//============================================ #include #include @@ -73,8 +68,8 @@ template< typename T > void print_norm( const grb::Vector< T > &r, const char * // default simulation parameters, set as in reference HPCG // users can input different ones via the cmd line constexpr size_t PHYS_SYSTEM_SIZE_DEF = 16UL; -constexpr size_t PHYS_SYSTEM_SIZE_MIN = 2UL; -constexpr size_t MAX_COARSENING_LEVELS = 3U; +constexpr size_t PHYS_SYSTEM_SIZE_MIN = 4UL; +constexpr size_t MAX_COARSENING_LEVELS = 3UL; constexpr size_t MAX_ITERATIONS_DEF = 56UL; constexpr size_t SMOOTHER_STEPS_DEF = 1; @@ -94,18 +89,37 @@ static const char * const TEXT_HIGHLIGHT = "===> "; #define thcerr ( std::cerr << TEXT_HIGHLIGHT ) #define MASTER_PRINT( pid, txt ) if( pid == 0 ) { std::cout << txt; } -/** - * Container for system parameters to create the HPCG problem. - */ -struct system_input { - size_t nx, ny, nz; - size_t max_coarsening_levels; -}; +// default types +using IOType = double; +using NonzeroType = double; +using InputType = double; +using ResidualType = double; +using StdRing = Semiring< grb::operators::add< NonzeroType >, grb::operators::mul< NonzeroType >, + grb::identities::zero, grb::identities::one >; +using StdMinus = operators::subtract< NonzeroType >; +using coord_t = size_t; + +// assembled types for simulation runners and input/output structures +using hpcg_runner_t = HPCGRunnerType< IOType, NonzeroType, InputType, ResidualType, + StdRing, StdMinus >; +using mg_data_t = MultiGridData< IOType, NonzeroType >; +using coarsening_data_t = CoarseningData< IOType, NonzeroType >; +using smoothing_data_t = SmootherData< IOType >; +using hpcg_data_t = MultiGridCGData< IOType, NonzeroType, InputType >; + +static const IOType io_zero = StdRing(). template getZero< IOType >(); +static const NonzeroType nz_zero = StdRing(). template getZero< NonzeroType >(); +static const InputType input_zero = StdRing(). template getZero< InputType >(); +static const ResidualType residual_zero = StdRing(). template getZero< ResidualType >(); /** * Container for the parameters for the HPCG simulation. */ -struct simulation_input : public system_input { +struct simulation_input { + // physical parameters for the multi-grid + size_t nx, ny, nz; + size_t max_coarsening_levels; + // solver options size_t inner_test_repetitions; size_t max_iterations; size_t smoother_steps; @@ -114,15 +128,6 @@ struct simulation_input : public system_input { bool print_iter_stats; }; -using IOType = double; -using NonzeroType = double; -using InputType = double; -using ResidualType = double; -using StdRing = Semiring< grb::operators::add< NonzeroType >, grb::operators::mul< NonzeroType >, - grb::identities::zero, grb::identities::one >; -using StdMinus = operators::subtract< NonzeroType >; -using coord_t = size_t; - /** * Container for test outputs. */ @@ -131,17 +136,10 @@ struct output { size_t inner_test_repetitions = 0; grb::utils::TimerResults times; std::unique_ptr< PinnedVector< IOType > > pinnedVector; - NonzeroType square_norm_diff = 0.0; - cg_out_data< NonzeroType > cg_out = { 0, 0.0 }; + NonzeroType square_norm_diff = nz_zero; + CGOutInfo< NonzeroType > cg_out = { 0, nz_zero }; }; -using hpcg_runner_t = HPCGRunnerType< IOType, NonzeroType, InputType, ResidualType, - StdRing, StdMinus >; -using mg_data_t = multigrid_data< IOType, NonzeroType >; -using coarsening_data_t = coarsening_data< IOType, NonzeroType >; -using smoothing_data_t = smoother_data< IOType >; -using hpcg_data_t = mg_cg_data< IOType, NonzeroType, InputType >; - #ifdef HPCG_PRINT_SYSTEM static void print_system( const std::vector< std::unique_ptr< mg_data_t > > &system_levels, @@ -156,18 +154,21 @@ static void print_system( } #endif +//========== ROUTINES TO TRACE SOLVER STEPS ========= #ifdef HPCG_PRINT_STEPS template< typename T, class Ring > void print_norm( const grb::Vector< T > & r, const char * head, const Ring & ring ) { - T norm = 0; + T norm = ring. template getZero< T >(); RC ret = grb::dot( norm, r, r, ring ); // norm = r' * r; (void)ret; assert( ret == SUCCESS ); if( spmd<>::pid() != 0 ) { return; } + // printf makes more likely to get single lineas in output with multiple processes + // additionally, it doesn't approximate double values if( head != nullptr ) { printf(">>> %s: %lf\n", head, norm ); } else { @@ -179,7 +180,27 @@ template< typename T > void print_norm( const grb::Vector< T > & r, const char * return print_norm( r, head, StdRing() ); } #endif +//============================================ + +/** + * Allocates the data structure input to the various simulation steps (CG, multi-grid, coarsening, smoothing) + * for each level of the multi-grid. The input is the vector of system sizes \p mg_sizes, with sizes in + * monotonically \b decreasing order (finest system first). + * + * This routine is algorithm-agnositc, as long as the constructors of the data types meet the requirements + * explained in \ref multigrid_allocate_data(). + */ +template< typename T > T static next_pow_2( T n ) { + static_assert( std::is_integral< T >::value, "Integral required." ); + --n; + n |= ( n >> 1 ); + for( unsigned i = 1; i <= sizeof( T ) * 4; i *= 2 ) { + const unsigned shift = static_cast< T >( 1U ) << i; + n |= ( n >> shift ); + } + return n + 1; +} /** * Allocates the data structure input to the various simulation steps (CG, multi-grid, coarsening, smoothing) @@ -194,13 +215,13 @@ static void allocate_system_structures( std::vector< std::unique_ptr< mg_data_t > > &system_levels, std::vector< std::unique_ptr< coarsening_data_t > > &coarsener_levels, std::vector< std::unique_ptr< smoothing_data_t > > &smoother_levels, - std::unique_ptr< hpcg_data_t > &holder + std::unique_ptr< hpcg_data_t > &cg_system_data ) { - const size_t pid { spmd<>::pid() }; + const size_t pid = spmd<>::pid() ; grb::utils::Timer timer; - hpcg_data_t *data{ new hpcg_data_t( mg_sizes[ 0 ] ) }; - holder = std::unique_ptr< hpcg_data_t >( data ); + hpcg_data_t *data = new hpcg_data_t( mg_sizes[ 0 ] ); + cg_system_data = std::unique_ptr< hpcg_data_t >( data ); MASTER_PRINT( pid, "allocating data for the MultiGrid simulation..."); timer.reset(); multigrid_allocate_data( mg_sizes, system_levels, coarsener_levels, smoother_levels ); @@ -210,34 +231,35 @@ static void allocate_system_structures( // zero all vectors MASTER_PRINT( pid, "zeroing all vectors..."); timer.reset(); - grb::RC rc = data->zero_temp_vectors(); + grb::RC rc = data->init_vectors( io_zero ); ASSERT_RC_SUCCESS( rc ); std::for_each( system_levels.begin(), system_levels.end(), - []( std::unique_ptr< mg_data_t > &s) { ASSERT_RC_SUCCESS( s->zero_temp_vectors() ); } ); + []( std::unique_ptr< mg_data_t > &s) { ASSERT_RC_SUCCESS( s->init_vectors( io_zero ) ); } ); std::for_each( coarsener_levels.begin(), coarsener_levels.end(), - []( std::unique_ptr< coarsening_data_t > &s) { ASSERT_RC_SUCCESS( s->zero_temp_vectors() ); } ); + []( std::unique_ptr< coarsening_data_t > &s) { ASSERT_RC_SUCCESS( s->init_vectors( io_zero ) ); } ); std::for_each( smoother_levels.begin(), smoother_levels.end(), - []( std::unique_ptr< smoothing_data_t > &s) { ASSERT_RC_SUCCESS( s->zero_temp_vectors() ); } ); + []( std::unique_ptr< smoothing_data_t > &s) { ASSERT_RC_SUCCESS( s->init_vectors( io_zero ) ); } ); time = timer.time(); MASTER_PRINT( pid, " time (ms) " << time << std::endl ); } /** * Builds and initializes a 3D system for an HPCG simulation according to the given 3D system sizes. + * It allocates the data structures and populates them according to the algorithms chosen for HPCG. */ static void build_3d_system( - const system_input & in, + const simulation_input & in, std::vector< std::unique_ptr< mg_data_t > > &system_levels, std::vector< std::unique_ptr< coarsening_data_t > > &coarsener_levels, std::vector< std::unique_ptr< smoothing_data_t > > &smoother_levels, - std::unique_ptr< hpcg_data_t > &holder + std::unique_ptr< hpcg_data_t > &cg_system_data ) { constexpr size_t DIMS = 3; using builder_t = grb::algorithms::HPCGSystemBuilder< DIMS, coord_t, NonzeroType >; - const size_t pid { spmd<>::pid() }; + const size_t pid = spmd<>::pid(); grb::utils::Timer timer; - hpcg_system_params< DIMS, NonzeroType > params { + HPCGSystemParams< DIMS, NonzeroType > params = { { in.nx, in.ny, in.nz }, HALO_RADIUS, SYSTEM_DIAG_VALUE, SYSTEM_NON_DIAG_VALUE, PHYS_SYSTEM_SIZE_MIN, in.max_coarsening_levels, 2 }; @@ -246,22 +268,25 @@ static void build_3d_system( MASTER_PRINT( pid, "building HPCG generators for " << ( in.max_coarsening_levels + 1 ) << " levels..." ); timer.reset(); + // construct the builder_t generator for each grid level, which depends on the system physics hpcg_build_multigrid_generators( params, mg_generators ); double time = timer.time(); MASTER_PRINT( pid, " time (ms) " << time << std::endl ); MASTER_PRINT( pid, "built HPCG generators for " << mg_generators.size() << " levels" << std::endl ); - + // extract the size for each level std::vector< size_t > mg_sizes; - // exclude main system std::transform( mg_generators.cbegin(), mg_generators.cend(), std::back_inserter( mg_sizes ), [] ( const builder_t &b ) { return b.system_size(); } ); - allocate_system_structures( mg_sizes, system_levels, coarsener_levels, smoother_levels, holder ); + // given the sizes, allocate the data structures for all the inputs of the algorithms + allocate_system_structures( mg_sizes, system_levels, coarsener_levels, smoother_levels, cg_system_data ); assert( mg_generators.size() == system_levels.size() ); assert( mg_generators.size() == smoother_levels.size() ); - assert( mg_generators.size() - 1 == coarsener_levels.size() ); + assert( mg_generators.size() - 1 == coarsener_levels.size() ); // coarsener acts between two levels + // for each grid level, populate the data structures according to the specific algorithm + // and track the time for diagnostics purposes for( size_t i = 0; i < mg_generators.size(); i++) { MASTER_PRINT( pid, "SYSTEM LEVEL " << i << std::endl ); MASTER_PRINT( pid, " populating system matrix: " ); @@ -290,31 +315,31 @@ static void build_3d_system( } /** - * @brief Main test, building an HPCG problem and running the simulation closely following the + * Main test, building an HPCG problem and running the simulation closely following the * parameters in the reference HPCG test. */ void grbProgram( const simulation_input & in, struct output & out ) { // get user process ID - const size_t pid { spmd<>::pid() }; - MASTER_PRINT( pid, "beginning input generation..." << std::endl ); - + const size_t pid = spmd<>::pid(); grb::utils::Timer timer; + MASTER_PRINT( pid, "beginning input generation..." << std::endl ); // wrap hpcg_data inside a unique_ptr to forget about cleaning chores std::unique_ptr< hpcg_data_t > hpcg_state; + // define the main HPCG runner and initialize the options of its components hpcg_runner_t hpcg_runner( build_hpcg_runner< IOType, NonzeroType, InputType, ResidualType, StdRing, StdMinus >( in.smoother_steps ) ); auto &mg_runner = hpcg_runner.mg_runner; auto &coarsener = mg_runner.coarsener_runner; auto &smoother = mg_runner.smoother_runner; - hpcg_runner.cg_opts.max_iterations = in.max_iterations; - hpcg_runner.cg_opts.tolerance = 0.0; + hpcg_runner.cg_opts.tolerance = residual_zero; hpcg_runner.cg_opts.with_preconditioning = ! in.no_preconditioning; timer.reset(); + // build the entire multi-grid system build_3d_system( in, mg_runner.system_levels, coarsener.coarsener_levels, smoother.levels, hpcg_state ); - double input_duration { timer.time() }; + double input_duration = timer.time(); MASTER_PRINT( pid, "input generation time (ms): " << input_duration << std::endl ); #ifdef HPCG_PRINT_SYSTEM @@ -323,16 +348,16 @@ void grbProgram( const simulation_input & in, struct output & out ) { } #endif - Matrix< NonzeroType > & A { mg_runner.system_levels[ 0 ]->A }; - Vector< NonzeroType > & x { hpcg_state->x }; - Vector< NonzeroType > & b { hpcg_state->b }; + Matrix< NonzeroType > &A = mg_runner.system_levels[ 0 ]->A; + Vector< IOType > &x = hpcg_state->x; + Vector< NonzeroType > &b = hpcg_state->b; - RC rc { SUCCESS }; + RC rc = SUCCESS; // set vectors as from standard HPCG benchmark set( x, 1.0 ); - set( b, 0.0 ); + set( b, nz_zero ); rc = grb::mxv( b, A, x, StdRing() ); - set( x, 0.0 ); + set( x, io_zero ); #ifdef HPCG_PRINT_SYSTEM if( pid == 0 ) { @@ -343,49 +368,39 @@ void grbProgram( const simulation_input & in, struct output & out ) { out.times.preamble = timer.time(); - cg_out_data< NonzeroType > &cg_out = out.cg_out; mg_data_t &grid_base = *mg_runner.system_levels[ 0 ]; // do a cold run to warm the system up MASTER_PRINT( pid, TEXT_HIGHLIGHT << "beginning cold run..." << std::endl ); hpcg_runner.cg_opts.max_iterations = 1; timer.reset(); - rc = hpcg_runner( grid_base, *hpcg_state, cg_out ); - double iter_duration { timer.time() }; + rc = hpcg_runner( grid_base, *hpcg_state, out.cg_out ); + double iter_duration = timer.time(); ASSERT_RC_SUCCESS( rc ); MASTER_PRINT( pid, " time (ms): " << iter_duration << std::endl ); + // restore CG options to user-given values hpcg_runner.cg_opts.max_iterations = in.max_iterations; hpcg_runner.cg_opts.print_iter_stats = in.print_iter_stats; - // do benchmark - const size_t inner_test_repetitions = in.evaluation_run ? 1 : in.inner_test_repetitions; - if( in.evaluation_run ) { - MASTER_PRINT( pid, TEXT_HIGHLIGHT << "beginning evaluation run..." << std::endl ); - } else { - MASTER_PRINT( pid, TEXT_HIGHLIGHT << "beginning test run..." << std::endl ); - } + MASTER_PRINT( pid, TEXT_HIGHLIGHT << "beginning solver..." << std::endl ); out.inner_test_repetitions = 0; out.times.useful = 0.0; - for( size_t i = 0; i < inner_test_repetitions; ++i ) { - rc = set( x, 0.0 ); + // do benchmark + for( size_t i = 0; i < in.inner_test_repetitions; ++i ) { + rc = set( x, io_zero ); ASSERT_RC_SUCCESS( rc ); MASTER_PRINT( pid, TEXT_HIGHLIGHT << "beginning iteration: " << i << std::endl ); timer.reset(); - rc = hpcg_runner( grid_base, *hpcg_state, cg_out ); - out.times.useful += timer.time(); + rc = hpcg_runner( grid_base, *hpcg_state, out.cg_out ); + iter_duration = timer.time(); + out.times.useful += iter_duration; ASSERT_RC_SUCCESS( rc ); MASTER_PRINT( pid, "repetition,duration (ms): " << i << "," << iter_duration << std::endl ); out.inner_test_repetitions++; } if( in.evaluation_run ) { - rc = collectives<>::reduce( iter_duration, 0, operators::max< double >() ); - ASSERT_RC_SUCCESS( rc ); - out.inner_test_repetitions = static_cast< size_t >( 1000.0 / out.times.useful ) + 1; - MASTER_PRINT( pid, "Evaluation run" << std::endl - << " computed residual: " << cg_out.norm_residual << std::endl - << " iterations: " << cg_out.iterations << std::endl - << " time taken (ms): " << out.times.useful << std::endl - << " deduced inner repetitions for 1s duration: " << out.inner_test_repetitions << std::endl ); + // get maximum execution time among processes + rc = collectives<>::reduce( out.times.useful, 0, operators::max< double >() ); return; } out.times.useful /= static_cast< double >( in.inner_test_repetitions ); @@ -400,7 +415,7 @@ void grbProgram( const simulation_input & in, struct output & out ) { grb::set( b, 1.0 ); grb::eWiseMul( b, -1.0, x, StdRing() ); - out.square_norm_diff = 0.0; + out.square_norm_diff = nz_zero; grb::dot( out.square_norm_diff, b, b, StdRing() ); // output @@ -410,7 +425,7 @@ void grbProgram( const simulation_input & in, struct output & out ) { } /** - * @brief Parser the command-line arguments to extract the simulation information and checks they are valid. + * Parser the command-line arguments to extract the simulation information and checks they are valid. */ static void parse_arguments( simulation_input &, size_t &, double &, int, char ** ); @@ -437,44 +452,47 @@ int main( int argc, char ** argv ) { struct output out; // set standard exit code - grb::RC rc { SUCCESS }; + grb::RC rc = SUCCESS; // launch estimator (if requested) if( sim_in.evaluation_run ) { grb::Launcher< AUTOMATIC > launcher; + // run just one inner iteration for evaluation purposes + sim_in.inner_test_repetitions = 1; + thcout << "beginning evaluation run..." << std::endl; rc = launcher.exec( &grbProgram, sim_in, out, true ); - if( rc == SUCCESS ) { - sim_in.inner_test_repetitions = out.inner_test_repetitions; - } else { - thcout << "launcher.exec returns with non-SUCCESS error code " << grb::toString( rc ) << std::endl; - std::exit( -1 ); - } + ASSERT_RC_SUCCESS( rc ); + ASSERT_EQ( out.inner_test_repetitions, 1 ); + // compute number of inner repetitions to achieve at least 1s duration + sim_in.inner_test_repetitions = static_cast< size_t >( 1000.0 / out.times.useful ) + 1; + thcout << "Evaluation run" << std::endl + << " computed residual: " << out.cg_out.norm_residual << std::endl + << " iterations: " << out.cg_out.iterations << std::endl + << " time taken (ms): " << out.times.useful << std::endl + << " deduced inner repetitions for 1s duration: " << sim_in.inner_test_repetitions << std::endl; } // launch full benchmark grb::Benchmarker< AUTOMATIC > benchmarker; + thcout << "beginning test run..." << std::endl; rc = benchmarker.exec( &grbProgram, sim_in, out, 1, test_outer_iterations, true ); ASSERT_RC_SUCCESS( rc ); - thcout << "Benchmark completed successfully and took " << out.cg_out.iterations - << " iterations to converge with residual " << out.cg_out.norm_residual << std::endl; - - if( ! out.pinnedVector ) { - thcerr << "no output vector to inspect" << std::endl; - } else { - const PinnedVector< double > &solution { *out.pinnedVector }; - thcout << "Size of x is " << solution.size() << std::endl; - if( solution.size() > 0 ) { - print_vector( solution, 30, "SOLUTION" ); - } else { - thcerr << "ERROR: solution contains no values" << std::endl; - } - } - ASSERT_RC_SUCCESS( out.error_code ); - - double diff_norm { sqrt( out.square_norm_diff ) }; + thcout << "completed successfully!" << std::endl + << " final residual: " << out.cg_out.norm_residual << std::endl + << " solver iterations: " << out.cg_out.iterations << std::endl + << " total time (ms): " << out.times.useful << std::endl; + + // check result vector, stored inside a pinned vector + ASSERT_TRUE( out.pinnedVector ); + const PinnedVector< double > &solution = *out.pinnedVector; + thcout << "Size of x is " << solution.size() << std::endl; + ASSERT_GT( solution.size(), 0 ); + print_vector( solution, 30, "SOLUTION" ); + + // check norm of solution w.r.t. expected solution (i.e. vector of all 1) + double diff_norm = sqrt( out.square_norm_diff ); thcout << "Norm of difference vector | - |: " << diff_norm << std::endl; - ASSERT_LT( diff_norm, max_diff_norm ); thcout << "Test OK" << std::endl; @@ -496,7 +514,7 @@ static void parse_arguments( .add_optional_argument( "--max-coarse-levels", sim_in.max_coarsening_levels, MAX_COARSENING_LEVELS, "maximum level for coarsening; 0 means no coarsening; note: actual level may be limited" " by the minimum system dimension" ) - .add_optional_argument( "--test-rep", sim_in.inner_test_repetitions, grb::config::BENCHMARKING::inner(), + .add_optional_argument( "--inner-iterations", sim_in.inner_test_repetitions, 1, "consecutive test repetitions before benchmarking" ) .add_optional_argument( "--outer-iterations", outer_iterations, 1, "test repetitions with complete initialization" ) @@ -522,7 +540,7 @@ static void parse_arguments( std::exit( -1 ); } if( sim_in.inner_test_repetitions == 0 ) { - std::cerr << "ERROR no test runs selected: set \"--test-rep >0\"" << std::endl; + std::cerr << "ERROR no test runs selected: set \"--inner-iterations\" > 0" << std::endl; std::exit( -1 ); } if( sim_in.max_iterations == 0 ) { diff --git a/tests/utils/matrix_generators.hpp b/tests/utils/matrix_generators.hpp index be45890c6..65fe789be 100644 --- a/tests/utils/matrix_generators.hpp +++ b/tests/utils/matrix_generators.hpp @@ -35,6 +35,7 @@ #include #include +#include namespace grb { @@ -114,28 +115,6 @@ namespace grb { namespace internal { - /** - * Computes the difference between \a a and \a b and returns it as the given - * type \a DiffT. - * - * Raises an exception if \a DiffT cannot store the difference. - */ - template< - typename SizeT, - typename DiffT - > - DiffT compute_distance( - const SizeT a, - const SizeT b - ) { - const SizeT diff = std::max( a, b ) - std::min( a, b ); - if( diff > static_cast< SizeT >( std::numeric_limits< DiffT >::max() ) ) { - throw std::range_error( "cannot represent difference" ); - } - DiffT result = static_cast< DiffT >( diff ); - return a >= b ? result : -result ; - } - /** * Stores the coordinate for a generator of diagonal matrices. */ @@ -240,9 +219,8 @@ namespace grb { typename SelfType::difference_type operator-( const SelfType &other ) const { - return internal::compute_distance< - size_t, typename SelfType::difference_type - >( this->_v.coord, other._v.coord ); + return compute_signed_distance< typename SelfType::difference_type, + size_t >( this->_v.coord, other._v.coord ); } typename SelfType::pointer operator->() { return &_v; } @@ -461,9 +439,8 @@ namespace grb { const size_t this_position = coords_to_linear( _v.size, _v.row, _v.col ); const size_t other_position = coords_to_linear( other._v.size, other._v.row, other._v.col ); - return internal::compute_distance< - size_t, typename SelfType::difference_type - >( this_position, other_position ); + return compute_signed_distance< typename SelfType::difference_type, + size_t >( this_position, other_position ); } typename SelfType::pointer operator->() { return &_v; } @@ -584,9 +561,8 @@ namespace grb { typename SelfType::difference_type operator-( const SelfType &other ) const { - return internal::compute_distance< - size_t, typename SelfType::difference_type - >( this->_v.offset, other._v.offset ); + return compute_signed_distance< typename SelfType::difference_type, + size_t >( this->_v.offset, other._v.offset ); } typename SelfType::pointer operator->() { return &_v; } From 11931e18d506bff853aa44d178b08386afadab64 Mon Sep 17 00:00:00 2001 From: Alberto Scolari Date: Tue, 29 Nov 2022 15:49:03 +0100 Subject: [PATCH 13/28] removing limit to smallest MG system --- include/graphblas/algorithms/hpcg/system_builder.hpp | 2 +- .../utils/multigrid/linearized_halo_ndim_system.hpp | 2 +- tests/smoke/hpcg.cpp | 9 +++++++-- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/include/graphblas/algorithms/hpcg/system_builder.hpp b/include/graphblas/algorithms/hpcg/system_builder.hpp index 48a2e640d..e19ba208d 100644 --- a/include/graphblas/algorithms/hpcg/system_builder.hpp +++ b/include/graphblas/algorithms/hpcg/system_builder.hpp @@ -100,7 +100,7 @@ namespace grb { throw std::invalid_argument( "halo should be higher than 0" ); } for( const auto i : sizes ) { - if( i < 2 * halo + 1 ) { + if( i < halo + 1 ) { throw std::invalid_argument( "Iteration halo goes beyond system sizes" ); } } diff --git a/include/graphblas/utils/multigrid/linearized_halo_ndim_system.hpp b/include/graphblas/utils/multigrid/linearized_halo_ndim_system.hpp index d448fd426..400fdd3ab 100644 --- a/include/graphblas/utils/multigrid/linearized_halo_ndim_system.hpp +++ b/include/graphblas/utils/multigrid/linearized_halo_ndim_system.hpp @@ -108,7 +108,7 @@ namespace grb { _halo( halo ) { for( SizeType __size : sizes ) { - if ( __size < 2 * halo + 1 ) { + if ( __size < halo + 1 ) { throw std::invalid_argument( std::string( "the halo (" + std::to_string(halo) + std::string( ") goes beyond a system size (" ) + diff --git a/tests/smoke/hpcg.cpp b/tests/smoke/hpcg.cpp index c95cfba85..696c177fc 100644 --- a/tests/smoke/hpcg.cpp +++ b/tests/smoke/hpcg.cpp @@ -24,7 +24,6 @@ * benchmark impementation in https://github.com/hpcg-benchmark/hpcg. */ -#include #include #include #include @@ -68,7 +67,7 @@ template< typename T > void print_norm( const grb::Vector< T > &r, const char * // default simulation parameters, set as in reference HPCG // users can input different ones via the cmd line constexpr size_t PHYS_SYSTEM_SIZE_DEF = 16UL; -constexpr size_t PHYS_SYSTEM_SIZE_MIN = 4UL; +constexpr size_t PHYS_SYSTEM_SIZE_MIN = 2UL; constexpr size_t MAX_COARSENING_LEVELS = 3UL; constexpr size_t MAX_ITERATIONS_DEF = 56UL; constexpr size_t SMOOTHER_STEPS_DEF = 1; @@ -289,6 +288,12 @@ static void build_3d_system( // and track the time for diagnostics purposes for( size_t i = 0; i < mg_generators.size(); i++) { MASTER_PRINT( pid, "SYSTEM LEVEL " << i << std::endl ); + auto& sizes = mg_generators[ i ].get_generator().get_sizes(); + MASTER_PRINT( pid, " sizes: " ); + for( size_t s = 0; s < DIMS - 1; s++ ) { + MASTER_PRINT( pid,sizes[ s ] << " x " ); + } + MASTER_PRINT( pid, sizes[ DIMS - 1 ] << std::endl ); MASTER_PRINT( pid, " populating system matrix: " ); timer.reset(); grb::RC rc = hpcg_populate_system_matrix( mg_generators[ i ], system_levels.at(i)->A ); From 012f3e82cd2e668a418ea070d2faf7b5a96151f7 Mon Sep 17 00:00:00 2001 From: Alberto Scolari Date: Wed, 23 Nov 2022 16:48:01 +0100 Subject: [PATCH 14/28] adding average coarsener and invoking it from the benchmark (on user's choice) --- .../algorithms/hpcg/average_coarsener.hpp | 349 ++++++++++++++++++ .../algorithms/hpcg/system_building_utils.hpp | 53 ++- tests/smoke/hpcg.cpp | 13 +- 3 files changed, 409 insertions(+), 6 deletions(-) create mode 100644 include/graphblas/algorithms/hpcg/average_coarsener.hpp diff --git a/include/graphblas/algorithms/hpcg/average_coarsener.hpp b/include/graphblas/algorithms/hpcg/average_coarsener.hpp new file mode 100644 index 000000000..6af5e5ff7 --- /dev/null +++ b/include/graphblas/algorithms/hpcg/average_coarsener.hpp @@ -0,0 +1,349 @@ + +/* + * Copyright 2022 Huawei Technologies Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file average_coarsener.hpp + * @author Alberto Scolari (alberto.scolari@huawei.com) + * Utilities to build the coarsening matrix for an HPCG simulation. + */ + +#ifndef _H_GRB_ALGORITHMS_AVERAGE_COARSENER +#define _H_GRB_ALGORITHMS_AVERAGE_COARSENER + +#include +#include +#include +#include +#include +#include + +#include +#include + +namespace grb { + namespace algorithms { + + // forward declaration + template< + size_t DIMS, + typename CoordType, + typename ValueType + > class AverageCoarsenerBuilder; + + /** + * Iterator class to generate the coarsening matrix that averages over the elements of the finer + * domain corresponding to the element of the coarser domain. + * + * The coarsening matrix averages \b all elements that are coarsened into one. + * + * This coarsening method requires some computation but should be relatively robust to noise + * or to partitioning strategies to parallelize the smoother (usually run before coarsening). + * + * This iterator is random-access. + * + * @tparam DIMS number of dimensions + * @tparam CoordType type storing the coordinates and the sizes + * @tparam ValueType type of the nonzero: it must be able to represent 1 / + * + */ + template< + size_t DIMS, + typename CoordType, + typename ValueType + > struct AverageGeneratorIterator { + + friend AverageCoarsenerBuilder< DIMS, CoordType, ValueType >; + + using RowIndexType = CoordType; ///< numeric type of rows + using ColumnIndexType = CoordType; + using LinearSystemType = grb::utils::multigrid::LinearizedNDimSystem< CoordType, + grb::utils::multigrid::ArrayVectorStorage< DIMS, CoordType > >; + using LinearSystemIterType = typename LinearSystemType::Iterator; + using SelfType = AverageGeneratorIterator< DIMS, CoordType, ValueType >; + using ArrayType = std::array< CoordType, DIMS >; + + struct _ValueGenerator { + + friend SelfType; + + _ValueGenerator( + RowIndexType i, + ColumnIndexType j, + ValueType value + ) noexcept : + _i( i ), + _j( j ), + _value( value ) + {} + + _ValueGenerator( const _ValueGenerator & ) = default; + + _ValueGenerator & operator=( const _ValueGenerator & ) = default; + + inline RowIndexType i() const { return _i; } + inline ColumnIndexType j() const { return _j; } + inline ValueType v() const { return _value; } + + private: + RowIndexType _i; + ColumnIndexType _j; + ValueType _value; + }; + + // interface for std::random_access_iterator + using iterator_category = std::random_access_iterator_tag; + using value_type = _ValueGenerator; + using pointer = const value_type; + using reference = const value_type&; + using difference_type = typename LinearSystemIterType::difference_type; + + AverageGeneratorIterator( const SelfType &o ) = default; + + AverageGeneratorIterator( SelfType &&o ) = default; + + SelfType & operator=( const SelfType & ) = default; + + SelfType & operator=( SelfType && ) = default; + + /** + * Advances \c this by 1 in constant time. + */ + SelfType & operator++() noexcept { + (void) ++_subspace_iter; + size_t subspace_position = _subspace_iter->get_linear_position(); + // std::cout << "subspace_position " << subspace_position << std::endl; + if( subspace_position == _num_neighbors ) { + (void) ++_sys_iter; + _subspace_iter = _finer_subspace->begin(); + } + update_coords(); + return *this; + } + + /** + * Advances \c this by \p offset in constant time. + */ + SelfType & operator+=( size_t offset ) { + CoordType sub_offset = _subspace_iter->get_linear_position() + offset; + std::ldiv_t res = std::ldiv( sub_offset, _num_neighbors ); + _sys_iter += res.quot; + _subspace_iter = _finer_subspace->begin(); + _subspace_iter += res.rem; + update_coords(); + return *this; + } + + /** + * Computes the difference between \c this and \p o as integer. + */ + difference_type operator-( const SelfType &o ) const { + return this->_sys_iter - o._sys_iter; + } + + /** + * Returns whether \c this and \p o differ. + */ + bool operator!=( const SelfType &o ) const { + return this->_sys_iter != o._sys_iter; + } + + /** + * Returns whether \c this and \p o are equal. + */ + bool operator==( const SelfType &o ) const { + return ! this->operator!=( o ); + } + + reference operator*() const { + return _val; + } + + pointer operator->() const { + return &_val; + } + + /** + * Returns the current row, within the coarser system. + */ + inline RowIndexType i() const { + return _val.i(); + } + + /** + * Returns the current column, within the finer system. + */ + inline ColumnIndexType j() const { + return _val.j(); + } + + /** + * Returns always 1, as the coarsening keeps the same value. + */ + inline ValueType v() const { + return _val.v(); + } + + private: + const LinearSystemType *_lin_sys; + const LinearSystemType *_finer_subspace; + const ArrayType *_steps; + CoordType _num_neighbors; + LinearSystemIterType _sys_iter; + LinearSystemIterType _subspace_iter; + value_type _val; + + /** + * Construct a new AverageGeneratorIterator object starting from the LinearizedNDimSystem + * object \p system describing the \b coarser system and the \b ratios \p steps between each finer and + * the corresponding corser dimension. + * + * @param system LinearizedNDimSystem object describing the coarser system + * @param finer_subspace LinearizedNDimSystem object describing the subspace of each element + * in the finer system + * @param steps ratios per dimension between finer and coarser system + */ + AverageGeneratorIterator( + const LinearSystemType &system, + const LinearSystemType &finer_subspace, + const ArrayType &steps + ) noexcept : + _lin_sys( &system ), + _finer_subspace( &finer_subspace ), + _steps( &steps ), + _num_neighbors( std::accumulate( steps.cbegin(), steps.cend(), 1UL, std::multiplies< CoordType >() ) ), + _sys_iter( system.begin() ), + _subspace_iter( finer_subspace.begin() ), + _val( 0, 0, static_cast< ValueType >( 1 ) / static_cast< ValueType >( _num_neighbors ) ) + { + update_coords(); + } + + void update_coords() noexcept { + _val._i = _sys_iter->get_linear_position(); + _val._j = coarse_rows_to_finer_col(); + } + + /** + * Returns the row coordinates converted to the finer system, to compute + * the column value. + */ + ColumnIndexType coarse_rows_to_finer_col() const noexcept { + ColumnIndexType finer = 0; + ColumnIndexType s = 1; + for( size_t i = 0; i < DIMS; i++ ) { + finer += s * _subspace_iter->get_position()[ i ]; + s *= (*_steps)[ i ]; + finer += s * _sys_iter->get_position()[ i ]; + s *= _lin_sys->get_sizes()[ i ]; + } + return finer; + } + }; + + /** + * Builder object to create iterators that generate an averaging-coarsening matrix. + * + * It is a facility to generate beginning and end iterators and abstract the logic away from users. + * + * @tparam DIMS number of dimensions + * @tparam CoordType type storing the coordinates and the sizes + * @tparam ValueType type of the nonzero: it must be able to represent 1 (the value to sample + * the finer value) + */ + template< + size_t DIMS, + typename CoordType, + typename ValueType + > class AverageCoarsenerBuilder { + public: + using ArrayType = std::array< CoordType, DIMS >; + using Iterator = AverageGeneratorIterator< DIMS, CoordType, ValueType >; + using SelfType = AverageCoarsenerBuilder< DIMS, CoordType, ValueType >; + + /** + * Construct a new AverageCoarsenerBuilder object from the sizes of finer system + * and those of the coarser system; finer sizes must be an exact multiple of coarser sizes, + * otherwise an exception is raised. + */ + AverageCoarsenerBuilder( + const ArrayType &_finer_sizes, + const ArrayType &_coarser_sizes + ) : + system( _coarser_sizes.begin(), _coarser_sizes.end() ), + _finer_subspace( _coarser_sizes.cbegin(), _coarser_sizes.cend() ), + steps( DIMS ) + { + for( size_t i = 0; i < DIMS; i++ ) { + // finer size MUST be an exact multiple of coarser_size + std::ldiv_t ratio = std::ldiv( _finer_sizes[ i ], _coarser_sizes[ i ] ); + if( ratio.quot < 2 || ratio.rem != 0 ) { + throw std::invalid_argument( + std::string( "finer size of dimension " ) + std::to_string( i ) + + std::string( "is not an exact multiple of coarser size" ) + ); + } + steps[ i ] = ratio.quot; + } + _finer_subspace.retarget( steps ); + } + + AverageCoarsenerBuilder( const SelfType & ) = delete; + + AverageCoarsenerBuilder( SelfType && ) = delete; + + SelfType & operator=( const SelfType & ) = delete; + + SelfType & operator=( SelfType && ) = delete; + + /** + * Returns the size of the finer system, i.e. its number of elements. + */ + size_t system_size() const { + return system.system_size(); + } + + /** + * Produces a beginning iterator to generate the coarsening matrix. + */ + Iterator make_begin_iterator() { + return Iterator( system, _finer_subspace, steps ); + } + + /** + * Produces an end iteratormto stop the generation of the coarsening matrix. + */ + Iterator make_end_iterator() { + Iterator result( system, _finer_subspace, steps ); + result += ( system_size() * _finer_subspace.system_size() ); // do not trigger boundary checks + // ++result; + return result; + } + + private: + const grb::utils::multigrid::LinearizedNDimSystem< CoordType, + grb::utils::multigrid::ArrayVectorStorage< DIMS, CoordType > > system; + grb::utils::multigrid::LinearizedNDimSystem< CoordType, + grb::utils::multigrid::ArrayVectorStorage< DIMS, CoordType > > _finer_subspace; + + grb::utils::multigrid::ArrayVectorStorage< DIMS, CoordType > steps; ///< array of steps, i.e. how much each column coordinate (finer system) must be + //// incremented when incrementing the row coordinates; is is the ration between + //// #finer_sizes and row_generator#physical_sizes + }; + + } // namespace algorithms +} // namespace grb +#endif // _H_GRB_ALGORITHMS_AVERAGE_COARSENER diff --git a/include/graphblas/algorithms/hpcg/system_building_utils.hpp b/include/graphblas/algorithms/hpcg/system_building_utils.hpp index c0b522521..9503f77ff 100644 --- a/include/graphblas/algorithms/hpcg/system_building_utils.hpp +++ b/include/graphblas/algorithms/hpcg/system_building_utils.hpp @@ -39,6 +39,7 @@ #include "system_builder.hpp" #include "single_point_coarsener.hpp" +#include "average_coarsener.hpp" #include "greedy_coloring.hpp" namespace grb { @@ -168,13 +169,22 @@ namespace grb { * This function takes care of parallelizing the generation by using a random-access iterator * to generate the coarsening matrix and by distributing the generation across nodes * of a distributed system (if any). + * @tparam IterBuilderType type of the matrix builder, either SinglePointCoarsenerBuilder + * or AverageCoarsenerBuilder + * @tparam DIMS number of dimensions + * @tparam CoordType type storing the coordinates and the sizes + * @tparam NonzeroType type of the nonzero + * @param finer_system_generator object generating the finer system + * @param coarser_system_generator object generating the finer system + * @param coarsener structure with the matrix to populate */ template< + typename IterBuilderType, size_t DIMS, typename CoordType, typename IOType, typename NonzeroType - > grb::RC hpcg_populate_coarsener( + > grb::RC hpcg_populate_coarsener_any_builder( const grb::algorithms::HPCGSystemBuilder< DIMS, CoordType, NonzeroType > &finer_system_generator, const grb::algorithms::HPCGSystemBuilder< DIMS, CoordType, NonzeroType > &coarser_system_generator, CoarseningData< IOType, NonzeroType > &coarsener @@ -201,15 +211,50 @@ namespace grb { " with rows == and cols == " ); } - using gen_t = typename grb::algorithms::SinglePointCoarsenerBuilder< DIMS, CoordType, NonzeroType >; - gen_t coarsener_builder( finer_sizes, coarser_sizes ); - typename gen_t::Iterator begin( coarsener_builder.make_begin_iterator() ), + IterBuilderType coarsener_builder( finer_sizes, coarser_sizes ); + typename IterBuilderType::Iterator begin( coarsener_builder.make_begin_iterator() ), end( coarsener_builder.make_end_iterator() ); grb::utils::partition_iteration_range_on_procs( spmd<>::nprocs(), spmd<>::pid(), coarsener_builder.system_size(), begin, end ); return buildMatrixUnique( M, begin, end, grb::IOMode::PARALLEL ); } + /** + * Populates a coarsener that samples one element every \a 2^DIMS . + */ + template< + size_t DIMS, + typename CoordType, + typename IOType, + typename NonzeroType + > grb::RC hpcg_populate_coarsener( + const grb::algorithms::HPCGSystemBuilder< DIMS, CoordType, NonzeroType > &finer_system_generator, + const grb::algorithms::HPCGSystemBuilder< DIMS, CoordType, NonzeroType > &coarser_system_generator, + CoarseningData< IOType, NonzeroType > &coarsener + ) { + return hpcg_populate_coarsener_any_builder< + grb::algorithms::SinglePointCoarsenerBuilder< DIMS, CoordType, NonzeroType > > + ( finer_system_generator, coarser_system_generator, coarsener ); + } + + /** + * Populates a coarsener that averages over \a 2^DIMS elements. + */ + template< + size_t DIMS, + typename CoordType, + typename IOType, + typename NonzeroType + > grb::RC hpcg_populate_coarsener_avg( + const grb::algorithms::HPCGSystemBuilder< DIMS, CoordType, NonzeroType > &finer_system_generator, + const grb::algorithms::HPCGSystemBuilder< DIMS, CoordType, NonzeroType > &coarser_system_generator, + CoarseningData< IOType, NonzeroType > &coarsener + ) { + return hpcg_populate_coarsener_any_builder< + grb::algorithms::AverageCoarsenerBuilder< DIMS, CoordType, NonzeroType > > + ( finer_system_generator, coarser_system_generator, coarsener ); + } + namespace internal { /** diff --git a/tests/smoke/hpcg.cpp b/tests/smoke/hpcg.cpp index 696c177fc..7edd9a1d4 100644 --- a/tests/smoke/hpcg.cpp +++ b/tests/smoke/hpcg.cpp @@ -119,6 +119,7 @@ struct simulation_input { size_t nx, ny, nz; size_t max_coarsening_levels; // solver options + bool use_average_coarsener; size_t inner_test_repetitions; size_t max_iterations; size_t smoother_steps; @@ -311,7 +312,11 @@ static void build_3d_system( if( i > 0 ) { MASTER_PRINT( pid, " populating coarsening data: " ); timer.reset(); - rc = hpcg_populate_coarsener( mg_generators[ i - 1 ], mg_generators[ i ], *coarsener_levels[ i - 1 ] ); + if( !in.use_average_coarsener ) { + rc = hpcg_populate_coarsener( mg_generators[ i - 1 ], mg_generators[ i ], *coarsener_levels[ i - 1 ] ); + } else { + rc = hpcg_populate_coarsener_avg( mg_generators[ i - 1 ], mg_generators[ i ], *coarsener_levels[ i - 1 ] ); + } time = timer.time(); ASSERT_RC_SUCCESS( rc ); MASTER_PRINT( pid, " time (ms) " << time << std::endl ) @@ -443,6 +448,7 @@ int main( int argc, char ** argv ) { thcout << "System size x: " << sim_in.nx << std::endl; thcout << "System size y: " << sim_in.ny << std::endl; thcout << "System size z: " << sim_in.nz << std::endl; + thcout << "Coarsener: " << (sim_in.use_average_coarsener ? "average" : "single point sampler" ) << std::endl; thcout << "System max coarsening levels " << sim_in.max_coarsening_levels << std::endl; thcout << "Test repetitions: " << sim_in.inner_test_repetitions << std::endl; thcout << "Max iterations: " << sim_in.max_iterations << std::endl; @@ -453,6 +459,7 @@ int main( int argc, char ** argv ) { thcout << "Test outer iterations: " << test_outer_iterations << std::endl; thcout << "Maximum norm for residual: " << max_diff_norm << std::endl; + // the output struct struct output out; @@ -535,7 +542,9 @@ static void parse_arguments( .add_option( "--no-preconditioning", sim_in.no_preconditioning, false, "do not apply pre-conditioning via multi-grid V cycle" ) .add_option( "--print-iter-stats", sim_in.print_iter_stats, false, - "on each iteration, print more statistics" ); + "on each iteration, print more statistics" ) + .add_option( "--use-average-coarsener", sim_in.use_average_coarsener, false, + "coarsen by averaging instead of by sampling a single point (slower, but more accurate)" ); parser.parse( argc, argv ); From 2f7c223f0ef48f392f75d02a941c5dd74eec36db Mon Sep 17 00:00:00 2001 From: Alberto Scolari Date: Thu, 24 Nov 2022 15:46:36 +0100 Subject: [PATCH 15/28] replacing eWiseMulAdd() with eWiseMul() + eWiseApply() --- .../algorithms/multigrid/multigrid_cg.hpp | 64 ++++++++++--------- 1 file changed, 34 insertions(+), 30 deletions(-) diff --git a/include/graphblas/algorithms/multigrid/multigrid_cg.hpp b/include/graphblas/algorithms/multigrid/multigrid_cg.hpp index 735f87d81..2738864ef 100644 --- a/include/graphblas/algorithms/multigrid/multigrid_cg.hpp +++ b/include/graphblas/algorithms/multigrid/multigrid_cg.hpp @@ -152,13 +152,11 @@ namespace grb { MultiGridrunnerType &multigrid_runner, CGOutInfo< ResidualType > &out_info ) { - ResidualType alpha; - - const grb::Matrix< NonzeroType > &A = grid_base.A; + const grb::Matrix< NonzeroType > &A = grid_base.A; // system matrix grb::Vector< IOType > &r = grid_base.r; // residual vector grb::Vector< IOType > &z = grid_base.z; // pre-conditioned residual vector - grb::Vector< IOType > &x = cg_data.x; - const grb::Vector< InputType > &b = cg_data.b; + grb::Vector< IOType > &x = cg_data.x; // initial (and final) solution + const grb::Vector< InputType > &b = cg_data.b; // right-side value grb::Vector< IOType > &p = cg_data.p; // direction vector grb::Vector< IOType > &Ap = cg_data.u; // temp vector grb::RC ret = SUCCESS; @@ -169,15 +167,17 @@ namespace grb { ret = ret ? ret : grb::set( p, io_zero ); ret = ret ? ret : grb::set( p, x ); - ret = ret ? ret : grb::mxv< grb::descriptors::dense >( Ap, A, x, cg_opts.ring ); // Ap = A * x + // Ap = A * x + ret = ret ? ret : grb::mxv< grb::descriptors::dense >( Ap, A, x, cg_opts.ring ); assert( ret == SUCCESS ); - - ret = ret ? ret : grb::eWiseApply( r, b, Ap, cg_opts.minus ); // r = b - Ap; + // r = b - Ap + ret = ret ? ret : grb::eWiseApply( r, b, Ap, cg_opts.minus ); assert( ret == SUCCESS ); const ResidualType residual_zero = cg_opts.ring.template getZero< ResidualType >(); ResidualType norm_residual = residual_zero; - ret = ret ? ret : grb::dot( norm_residual, r, r, cg_opts.ring ); // norm_residual = r' * r; + // norm_residual = r' * r + ret = ret ? ret : grb::dot( norm_residual, r, r, cg_opts.ring ); assert( ret == SUCCESS ); // compute sqrt to avoid underflow @@ -196,7 +196,6 @@ namespace grb { DBG_print_norm( Ap, "start Ap" ); DBG_print_norm( r, "start r" ); #endif - do { #ifdef HPCG_PRINT_STEPS DBG_println( "========= iteration " << iter << " =========" ); @@ -219,58 +218,63 @@ namespace grb { #ifdef HPCG_PRINT_STEPS DBG_print_norm( z, "initial z" ); #endif - - ResidualType pAp; - if( iter == 0 ) { ret = ret ? ret : grb::set( p, z ); // p = z; assert( ret == SUCCESS ); - ret = ret ? ret : grb::dot( r_dot_z, r, z, cg_opts.ring ); // r_dot_z = r' * z; assert( ret == SUCCESS ); } else { old_r_dot_z = r_dot_z; - + // r_dot_z = r' * z r_dot_z = cg_opts.ring.template getZero< ResidualType >(); - ret = ret ? ret : grb::dot( r_dot_z, r, z, cg_opts.ring ); // r_dot_z = r' * z; + ret = ret ? ret : grb::dot( r_dot_z, r, z, cg_opts.ring ); assert( ret == SUCCESS ); beta = r_dot_z / old_r_dot_z; - ret = ret ? ret : grb::set( Ap, io_zero ); // Ap = 0; - ret = ret ? ret : grb::eWiseMulAdd( Ap, beta, p, z, cg_opts.ring ); // Ap += beta * p + z; - std::swap( Ap, p ); // p = Ap; + // Ap = 0 + ret = ret ? ret : grb::set( Ap, io_zero ); + assert( ret == SUCCESS ); + // Ap += beta * p + ret = ret ? ret : grb::eWiseMul( Ap, beta, p, cg_opts.ring ); + assert( ret == SUCCESS ); + // Ap = Ap + z + ret = ret ? ret : grb::eWiseApply( Ap, Ap, z, cg_opts.ring.getAdditiveOperator() ); + assert( ret == SUCCESS ); + // p = Ap + std::swap( Ap, p ); assert( ret == SUCCESS ); } #ifdef HPCG_PRINT_STEPS DBG_print_norm( p, "middle p" ); #endif - + // Ap = A * p ret = ret ? ret : grb::set( Ap, io_zero ); - ret = ret ? ret : grb::mxv< grb::descriptors::dense >( Ap, A, p, cg_opts.ring ); // Ap = A * p; + ret = ret ? ret : grb::mxv< grb::descriptors::dense >( Ap, A, p, cg_opts.ring ); assert( ret == SUCCESS ); #ifdef HPCG_PRINT_STEPS DBG_print_norm( Ap, "middle Ap" ); #endif - pAp = cg_opts.ring.template getZero< ResidualType >(); - ret = ret ? ret : grb::dot( pAp, Ap, p, cg_opts.ring ); // pAp = p' * Ap + // pAp = p' * Ap + ResidualType pAp = cg_opts.ring.template getZero< ResidualType >(); + ret = ret ? ret : grb::dot( pAp, Ap, p, cg_opts.ring ); assert( ret == SUCCESS ); - alpha = r_dot_z / pAp; - - ret = ret ? ret : grb::eWiseMul( x, alpha, p, cg_opts.ring ); // x += alpha * p; + ResidualType alpha = r_dot_z / pAp; + // x += alpha * p + ret = ret ? ret : grb::eWiseMul( x, alpha, p, cg_opts.ring ); assert( ret == SUCCESS ); #ifdef HPCG_PRINT_STEPS DBG_print_norm( x, "end x" ); #endif - - ret = ret ? ret : grb::eWiseMul( r, -alpha, Ap, cg_opts.ring ); // r += - alpha * Ap; + // r += - alpha * Ap + ret = ret ? ret : grb::eWiseMul( r, -alpha, Ap, cg_opts.ring ); assert( ret == SUCCESS ); #ifdef HPCG_PRINT_STEPS DBG_print_norm( r, "end r" ); #endif - + // residual = r' * r norm_residual = cg_opts.ring.template getZero< ResidualType >(); - ret = ret ? ret : grb::dot( norm_residual, r, r, cg_opts.ring ); // residual = r' * r; + ret = ret ? ret : grb::dot( norm_residual, r, r, cg_opts.ring ); assert( ret == SUCCESS ); norm_residual = std::sqrt( norm_residual ); From 216b99612f67cebdf4b621aaec2519475f4296e2 Mon Sep 17 00:00:00 2001 From: Alberto Scolari Date: Thu, 24 Nov 2022 16:07:49 +0100 Subject: [PATCH 16/28] logging per-iteration MG time and residual separately --- .../algorithms/multigrid/multigrid_cg.hpp | 21 ++++++------------- .../multigrid/multigrid_v_cycle.hpp | 13 +++++++++++- tests/smoke/hpcg.cpp | 3 ++- 3 files changed, 20 insertions(+), 17 deletions(-) diff --git a/include/graphblas/algorithms/multigrid/multigrid_cg.hpp b/include/graphblas/algorithms/multigrid/multigrid_cg.hpp index 2738864ef..4c4e0b0cf 100644 --- a/include/graphblas/algorithms/multigrid/multigrid_cg.hpp +++ b/include/graphblas/algorithms/multigrid/multigrid_cg.hpp @@ -36,7 +36,6 @@ #include #include -#include #include "multigrid_data.hpp" @@ -67,7 +66,6 @@ namespace grb { grb::Vector< IOType > x; ///< system solution being refined over the iterations: it us up to the user ///< to set the initial solution value to something meaningful - /** * Construct a new \c MultiGridCGData object by building its vectors with size \p sys_size. */ @@ -98,7 +96,7 @@ namespace grb { ///< and the result achieved so far returned ResidualType tolerance; ///< ratio between initial residual and current residual that halts the solver ///< if reached, for the solution is to be considered "good enough" - bool print_iter_stats; ///< whether to print information on the multi-grid and the residual on each iteration + bool print_iter_residual; ///< whether to print information on the multi-grid and the residual on each iteration Ring ring; ///< algebraic ring to be used Minus minus; ///< minus operator to be used }; @@ -189,8 +187,6 @@ namespace grb { ResidualType old_r_dot_z = residual_zero, r_dot_z = residual_zero, beta = residual_zero; size_t iter = 0; - grb::utils::Timer timer; - #ifdef HPCG_PRINT_STEPS DBG_print_norm( p, "start p" ); DBG_print_norm( Ap, "start Ap" ); @@ -200,17 +196,12 @@ namespace grb { #ifdef HPCG_PRINT_STEPS DBG_println( "========= iteration " << iter << " =========" ); #endif + if( cg_opts.print_iter_residual ) { + std::cout << "iteration " << iter; + } if( cg_opts.with_preconditioning ) { - if( cg_opts.print_iter_stats ) { - timer.reset(); - } ret = ret ? ret : multigrid_runner( grid_base ); assert( ret == SUCCESS ); - if( cg_opts.print_iter_stats ) { - double duration = timer.time(); - std::cout << "iteration, pre-conditioner: " << iter << "," - << duration << std::endl; - } } else { ret = ret ? ret : grb::set( z, r ); // z = r; assert( ret == SUCCESS ); @@ -279,8 +270,8 @@ namespace grb { norm_residual = std::sqrt( norm_residual ); - if( cg_opts.print_iter_stats ) { - std::cout << "iteration, residual: " << iter << "," << norm_residual << std::endl; + if( cg_opts.print_iter_residual ) { + std::cout << " residual " << norm_residual << std::endl; } ++iter; diff --git a/include/graphblas/algorithms/multigrid/multigrid_v_cycle.hpp b/include/graphblas/algorithms/multigrid/multigrid_v_cycle.hpp index 963da74d5..f6dbfbd03 100644 --- a/include/graphblas/algorithms/multigrid/multigrid_v_cycle.hpp +++ b/include/graphblas/algorithms/multigrid/multigrid_v_cycle.hpp @@ -33,6 +33,7 @@ #include #include +#include #include "multigrid_data.hpp" @@ -201,6 +202,8 @@ namespace grb { MGSmootherType smoother_runner; ///< object to run the smoother CoarsenerType coarsener_runner; ///< object to run the coarsener std::vector< std::unique_ptr< MultiGridInputType > > system_levels; ///< levels of the grid (finest first) + bool print_duration = false; ///< whether to print the duration of a full multi-grid call + grb::utils::Timer timer; Ring ring; ///< algebraic ring Minus minus; ///< minus operator @@ -238,11 +241,19 @@ namespace grb { * Operator to invoke a full multi-grid run starting from the given level. */ inline grb::RC operator()( MultiGridInputType &system ) { - return multi_grid< IOType, NonzeroType, __unique_ptr_extractor, + if( print_duration ) { + timer.reset(); + } + grb::RC ret = multi_grid< IOType, NonzeroType, __unique_ptr_extractor, MGSmootherType, CoarsenerType, Ring, Minus >( __unique_ptr_extractor( system_levels.begin() += system.level ), __unique_ptr_extractor( system_levels.end() ), smoother_runner, coarsener_runner, ring, minus ); + if( print_duration ) { + double duration = timer.time(); + std::cout << " pre-conditioner (ms) "<< duration; + } + return ret; } }; diff --git a/tests/smoke/hpcg.cpp b/tests/smoke/hpcg.cpp index 7edd9a1d4..bba6e9c2f 100644 --- a/tests/smoke/hpcg.cpp +++ b/tests/smoke/hpcg.cpp @@ -391,7 +391,8 @@ void grbProgram( const simulation_input & in, struct output & out ) { // restore CG options to user-given values hpcg_runner.cg_opts.max_iterations = in.max_iterations; - hpcg_runner.cg_opts.print_iter_stats = in.print_iter_stats; + hpcg_runner.cg_opts.print_iter_residual = in.print_iter_stats; + mg_runner.print_duration = in.print_iter_stats; MASTER_PRINT( pid, TEXT_HIGHLIGHT << "beginning solver..." << std::endl ); out.inner_test_repetitions = 0; out.times.useful = 0.0; From ffa7ba321adce69ec691032ce743e17a561a48c7 Mon Sep 17 00:00:00 2001 From: Alberto Scolari Date: Fri, 25 Nov 2022 14:55:18 +0100 Subject: [PATCH 17/28] allowing colors to start from highest first during greedy assignment --- .../algorithms/hpcg/greedy_coloring.hpp | 32 +++++++++++++++---- 1 file changed, 26 insertions(+), 6 deletions(-) diff --git a/include/graphblas/algorithms/hpcg/greedy_coloring.hpp b/include/graphblas/algorithms/hpcg/greedy_coloring.hpp index 5b6f80b2c..5519a6504 100644 --- a/include/graphblas/algorithms/hpcg/greedy_coloring.hpp +++ b/include/graphblas/algorithms/hpcg/greedy_coloring.hpp @@ -53,8 +53,14 @@ namespace grb { * most sizes, as the constants in front of this algorithms are very small. Implementing a distributed * coloring algorithm is anyway out of the scope of this prototype. * + * Colors are by default assigned in a greedy way from the lowest one up, making this coloring scheme very + * regular: close elements tend to have similar colors. This can be changed with \p lower_color_first + * \c = \c false , which assigns colors from the highest one. This may avoid "destructive interference" + * with following coarsening schemes. + * * @tparam DIMS dimensions of the system * @tparam CoordType type of the coordinates + * @tparam lower_color_first start greedy assignment of colors from lowest first * @param[in] system generator for an \p DIMS - dimesional system with halo * @param[out] row_colors if \p reorder_rows_per_color is false, stores the color of each row; * if \p reorder_rows_per_color is true, stores the new position of each row, so that rows @@ -66,7 +72,8 @@ namespace grb { */ template< size_t DIMS, - typename CoordType + typename CoordType, + bool lowest_color_first = true > void hpcg_greedy_color_ndim_system( const grb::utils::multigrid::LinearizedHaloNDimSystem< DIMS, CoordType > &system, std::vector< CoordType > &row_colors, @@ -112,11 +119,24 @@ namespace grb { if( currentlyAssigned < totalColors ) { // if there is at least one color left to use, look for it // smallest possible - for( CoordType j = 0; j < totalColors; ++j ) { - if( !assigned[ j ] ) { - // if no neighbor with this color, use it for this row - row_colors[ curRow ] = j; - break; + if( lowest_color_first ) { + // here, assign colors greedily starting from the lowest available one + for( CoordType j = 0; j < totalColors; ++j ) { + if( !assigned[ j ] ) { + // if no neighbor with this color, use it for this row + row_colors[ curRow ] = j; + break; + } + } + } else { + // here, assign colors greedily starting from the highest available one + for( CoordType j = totalColors; j > 0; --j ) { + CoordType color = j - 1; + if( !assigned[ color ] ) { + // if no neighbor with this color, use it for this row + row_colors[ curRow ] = color; + break; + } } } } else { From 9c3b2bae5f0caa3022cb9309e61b4e9bdd5bfb73 Mon Sep 17 00:00:00 2001 From: Alberto Scolari Date: Mon, 28 Nov 2022 17:16:03 +0100 Subject: [PATCH 18/28] adding descriptor template parameter all over to MG kernels; adding dense descriptor to HPCG builder; moving zero'ing of vector in RBGS smoother to main caller --- include/graphblas/algorithms/hpcg/hpcg.hpp | 28 ++++++------ .../algorithms/multigrid/multigrid_cg.hpp | 43 ++++++++++-------- .../multigrid/multigrid_v_cycle.hpp | 21 +++++---- .../multigrid/red_black_gauss_seidel.hpp | 44 ++++++++++++------- .../multigrid/single_matrix_coarsener.hpp | 29 ++++++------ tests/smoke/hpcg.cpp | 5 ++- 6 files changed, 100 insertions(+), 70 deletions(-) diff --git a/include/graphblas/algorithms/hpcg/hpcg.hpp b/include/graphblas/algorithms/hpcg/hpcg.hpp index c4598323a..b4884f4e1 100644 --- a/include/graphblas/algorithms/hpcg/hpcg.hpp +++ b/include/graphblas/algorithms/hpcg/hpcg.hpp @@ -42,6 +42,7 @@ namespace grb { // simply "assemble" types template< + Descriptor descr, typename IOType, typename ResidualType, typename NonzeroType, @@ -49,11 +50,11 @@ namespace grb { class Ring, class Minus > using HPCGRunnerType = MultiGridCGRunner< IOType, NonzeroType, InputType, ResidualType, - MultiGridRunner< IOType, NonzeroType, - RedBlackGSSmootherRunner< IOType, NonzeroType, Ring >, - SingleMatrixCoarsener< IOType, NonzeroType, Ring, Minus >, - Ring, Minus >, - Ring, Minus + MultiGridRunner< + RedBlackGSSmootherRunner< IOType, NonzeroType, Ring, descr >, + SingleMatrixCoarsener< IOType, NonzeroType, Ring, Minus, descr >, + IOType, NonzeroType, Ring, Minus, descr + >, Ring, Minus, descr >; /** @@ -63,26 +64,27 @@ namespace grb { * @param[in] smoother_steps how many times the smoother should run (both pre- and post-smoothing) */ template< + Descriptor descr, typename IOType, typename ResidualType, typename NonzeroType, typename InputType, class Ring, class Minus - > HPCGRunnerType< IOType, ResidualType, NonzeroType, InputType, Ring, Minus > + > HPCGRunnerType< descr, IOType, ResidualType, NonzeroType, InputType, Ring, Minus > build_hpcg_runner( size_t smoother_steps ) { - SingleMatrixCoarsener< IOType, NonzeroType, Ring, Minus > coarsener; - RedBlackGSSmootherRunner< IOType, NonzeroType, Ring > + SingleMatrixCoarsener< IOType, NonzeroType, Ring, Minus, descr > coarsener; + RedBlackGSSmootherRunner< IOType, NonzeroType, Ring, descr > smoother( { smoother_steps, smoother_steps, 1UL, {}, Ring() } ); - MultiGridRunner< IOType, NonzeroType, - RedBlackGSSmootherRunner< IOType, NonzeroType, Ring >, - SingleMatrixCoarsener< IOType, NonzeroType, Ring, Minus >, - Ring, Minus + MultiGridRunner< + RedBlackGSSmootherRunner< IOType, NonzeroType, Ring, descr >, + SingleMatrixCoarsener< IOType, NonzeroType, Ring, Minus, descr >, + IOType, NonzeroType, Ring, Minus, descr > mg_runner( std::move( smoother ), std::move( coarsener ) ); - return HPCGRunnerType< IOType, ResidualType, NonzeroType, InputType, Ring, Minus >( + return HPCGRunnerType< descr, IOType, ResidualType, NonzeroType, InputType, Ring, Minus >( std::move( mg_runner ) ); } diff --git a/include/graphblas/algorithms/multigrid/multigrid_cg.hpp b/include/graphblas/algorithms/multigrid/multigrid_cg.hpp index 4c4e0b0cf..c517c8cc4 100644 --- a/include/graphblas/algorithms/multigrid/multigrid_cg.hpp +++ b/include/graphblas/algorithms/multigrid/multigrid_cg.hpp @@ -120,6 +120,7 @@ namespace grb { * Failures of GraphBLAS operations are handled by immediately stopping the execution and by returning * the failure code. * + * @tparam descr descriptor for static information * @tparam IOType type of result and intermediate vectors used during computation * @tparam ResidualType type of the residual norm * @tparam NonzeroType type of matrix values @@ -136,6 +137,7 @@ namespace grb { * @return grb::RC SUCCESS in case of succesful run */ template< + Descriptor descr, typename IOType, typename ResidualType, typename NonzeroType, @@ -166,16 +168,16 @@ namespace grb { ret = ret ? ret : grb::set( p, x ); // Ap = A * x - ret = ret ? ret : grb::mxv< grb::descriptors::dense >( Ap, A, x, cg_opts.ring ); + ret = ret ? ret : grb::mxv< descr >( Ap, A, x, cg_opts.ring ); assert( ret == SUCCESS ); // r = b - Ap - ret = ret ? ret : grb::eWiseApply( r, b, Ap, cg_opts.minus ); + ret = ret ? ret : grb::eWiseApply< descr >( r, b, Ap, cg_opts.minus ); assert( ret == SUCCESS ); const ResidualType residual_zero = cg_opts.ring.template getZero< ResidualType >(); ResidualType norm_residual = residual_zero; // norm_residual = r' * r - ret = ret ? ret : grb::dot( norm_residual, r, r, cg_opts.ring ); + ret = ret ? ret : grb::dot< descr >( norm_residual, r, r, cg_opts.ring ); assert( ret == SUCCESS ); // compute sqrt to avoid underflow @@ -203,33 +205,36 @@ namespace grb { ret = ret ? ret : multigrid_runner( grid_base ); assert( ret == SUCCESS ); } else { - ret = ret ? ret : grb::set( z, r ); // z = r; + // z = r + ret = ret ? ret : grb::set( z, r ); assert( ret == SUCCESS ); } #ifdef HPCG_PRINT_STEPS DBG_print_norm( z, "initial z" ); #endif if( iter == 0 ) { - ret = ret ? ret : grb::set( p, z ); // p = z; + // p = z + ret = ret ? ret : grb::set< descr >( p, z ); assert( ret == SUCCESS ); - ret = ret ? ret : grb::dot( r_dot_z, r, z, cg_opts.ring ); // r_dot_z = r' * z; + // r_dot_z = r' * z + ret = ret ? ret : grb::dot< descr >( r_dot_z, r, z, cg_opts.ring ); assert( ret == SUCCESS ); } else { old_r_dot_z = r_dot_z; // r_dot_z = r' * z r_dot_z = cg_opts.ring.template getZero< ResidualType >(); - ret = ret ? ret : grb::dot( r_dot_z, r, z, cg_opts.ring ); + ret = ret ? ret : grb::dot< descr >( r_dot_z, r, z, cg_opts.ring ); assert( ret == SUCCESS ); beta = r_dot_z / old_r_dot_z; // Ap = 0 - ret = ret ? ret : grb::set( Ap, io_zero ); + ret = ret ? ret : grb::set< descr >( Ap, io_zero ); assert( ret == SUCCESS ); // Ap += beta * p - ret = ret ? ret : grb::eWiseMul( Ap, beta, p, cg_opts.ring ); + ret = ret ? ret : grb::eWiseMul< descr >( Ap, beta, p, cg_opts.ring ); assert( ret == SUCCESS ); // Ap = Ap + z - ret = ret ? ret : grb::eWiseApply( Ap, Ap, z, cg_opts.ring.getAdditiveOperator() ); + ret = ret ? ret : grb::eWiseApply< descr >( Ap, Ap, z, cg_opts.ring.getAdditiveOperator() ); assert( ret == SUCCESS ); // p = Ap std::swap( Ap, p ); @@ -239,33 +244,33 @@ namespace grb { DBG_print_norm( p, "middle p" ); #endif // Ap = A * p - ret = ret ? ret : grb::set( Ap, io_zero ); - ret = ret ? ret : grb::mxv< grb::descriptors::dense >( Ap, A, p, cg_opts.ring ); + ret = ret ? ret : grb::set< descr >( Ap, io_zero ); + ret = ret ? ret : grb::mxv< descr >( Ap, A, p, cg_opts.ring ); assert( ret == SUCCESS ); #ifdef HPCG_PRINT_STEPS DBG_print_norm( Ap, "middle Ap" ); #endif // pAp = p' * Ap ResidualType pAp = cg_opts.ring.template getZero< ResidualType >(); - ret = ret ? ret : grb::dot( pAp, Ap, p, cg_opts.ring ); + ret = ret ? ret : grb::dot< descr >( pAp, Ap, p, cg_opts.ring ); assert( ret == SUCCESS ); ResidualType alpha = r_dot_z / pAp; // x += alpha * p - ret = ret ? ret : grb::eWiseMul( x, alpha, p, cg_opts.ring ); + ret = ret ? ret : grb::eWiseMul< descr >( x, alpha, p, cg_opts.ring ); assert( ret == SUCCESS ); #ifdef HPCG_PRINT_STEPS DBG_print_norm( x, "end x" ); #endif // r += - alpha * Ap - ret = ret ? ret : grb::eWiseMul( r, -alpha, Ap, cg_opts.ring ); + ret = ret ? ret : grb::eWiseMul< descr >( r, -alpha, Ap, cg_opts.ring ); assert( ret == SUCCESS ); #ifdef HPCG_PRINT_STEPS DBG_print_norm( r, "end r" ); #endif // residual = r' * r norm_residual = cg_opts.ring.template getZero< ResidualType >(); - ret = ret ? ret : grb::dot( norm_residual, r, r, cg_opts.ring ); + ret = ret ? ret : grb::dot< descr >( norm_residual, r, r, cg_opts.ring ); assert( ret == SUCCESS ); norm_residual = std::sqrt( norm_residual ); @@ -299,6 +304,7 @@ namespace grb { * @tparam MultiGridrunnerType type for the multi-grid runner object * @tparam Ring algebraic ring type * @tparam Minus minus operator + * @tparam descr descriptors with statically-known data for computation and containers */ template< typename IOType, @@ -307,7 +313,8 @@ namespace grb { typename ResidualType, typename MultiGridRunnerType, class Ring, - class Minus + class Minus, + Descriptor descr = descriptors::no_operation > struct MultiGridCGRunner { using HPCGInputType = MultiGridCGData< IOType, NonzeroType, InputType >; @@ -348,7 +355,7 @@ namespace grb { MultiGridCGData< IOType, NonzeroType, InputType > &cg_data, CGOutInfo< ResidualType > &out_info ) { - return multigrid_conjugate_gradient( cg_data, cg_opts, grid_base, mg_runner, out_info ); + return multigrid_conjugate_gradient< descr >( cg_data, cg_opts, grid_base, mg_runner, out_info ); } }; diff --git a/include/graphblas/algorithms/multigrid/multigrid_v_cycle.hpp b/include/graphblas/algorithms/multigrid/multigrid_v_cycle.hpp index f6dbfbd03..177027f3e 100644 --- a/include/graphblas/algorithms/multigrid/multigrid_v_cycle.hpp +++ b/include/graphblas/algorithms/multigrid/multigrid_v_cycle.hpp @@ -58,6 +58,7 @@ namespace grb { * Failuers of GraphBLAS operations are handled by immediately stopping the execution * and returning the failure code. * + * @tparam descr descriptor for static information * @tparam IOType type of result and intermediate vectors used during computation * @tparam NonzeroType type of matrix values * @tparam MGSysIterType type of the iterator across grid levels @@ -78,6 +79,7 @@ namespace grb { * unsuccessful operation otherwise */ template < + Descriptor descr, typename IOType, typename NonzeroType, typename MGSysIterType, @@ -107,7 +109,7 @@ namespace grb { #endif // clean destination vector - ret = ret ? ret : grb::set( finer_system.z, ring. template getZero< IOType >() ); + ret = ret ? ret : grb::set< descr >( finer_system.z, ring. template getZero< IOType >() ); #ifdef HPCG_PRINT_STEPS DBG_print_norm( finer_system.r, "initial r" ); #endif @@ -136,7 +138,7 @@ namespace grb { DBG_print_norm( coarser_system.r, "coarse r" ); #endif - ret = ret ? ret : multi_grid< IOType, NonzeroType, MGSysIterType, + ret = ret ? ret : multi_grid< descr, IOType, NonzeroType, MGSysIterType, MGSmootherType, CoarsenerType, Ring, Minus >( mgiter_begin, mgiter_end, smoother, coarsener, ring, minus ); assert( ret == SUCCESS ); @@ -165,23 +167,24 @@ namespace grb { * It is built by transferring into it the state of both the smoother and the coarsener, * in order to avoid use-after-free issues. * - * @tparam IOType type of result and intermediate vectors used during computation - * @tparam NonzeroType type of matrix values - * @tparam MGSysIterType type of the iterator across grid levels * @tparam MGSmootherType type of the smoother runner, with prescribed methods for the various * smoothing steps * @tparam CoarsenerType type of the coarsener runner, with prescribed methods for coarsening + * @tparam IOType type of result and intermediate vectors used during computation + * @tparam NonzeroType type of matrix values * and prolongation * @tparam Ring the ring of algebraic operators and zero values * @tparam Minus the minus operator for subtractions + * @tparam descr descriptors with statically-known data for computation and containers */ template< - typename IOType, - typename NonzeroType, typename MGSmootherType, typename CoarsenerType, + typename IOType, + typename NonzeroType, class Ring, - class Minus + class Minus, + Descriptor descr = descriptors::no_operation > struct MultiGridRunner { static_assert( std::is_default_constructible< Ring >::value, @@ -244,7 +247,7 @@ namespace grb { if( print_duration ) { timer.reset(); } - grb::RC ret = multi_grid< IOType, NonzeroType, __unique_ptr_extractor, + grb::RC ret = multi_grid< descr, IOType, NonzeroType, __unique_ptr_extractor, MGSmootherType, CoarsenerType, Ring, Minus >( __unique_ptr_extractor( system_levels.begin() += system.level ), __unique_ptr_extractor( system_levels.end() ), diff --git a/include/graphblas/algorithms/multigrid/red_black_gauss_seidel.hpp b/include/graphblas/algorithms/multigrid/red_black_gauss_seidel.hpp index 97d0c80e4..3193b46fe 100644 --- a/include/graphblas/algorithms/multigrid/red_black_gauss_seidel.hpp +++ b/include/graphblas/algorithms/multigrid/red_black_gauss_seidel.hpp @@ -64,6 +64,7 @@ namespace grb { /** * Runs a single step of Red-Black Gauss-Seidel for a specific color. * + * @tparam descr descriptor for static information * @tparam IOType type of result and intermediate vectors used during computation * @tparam NonzeroType type of matrix values * @tparam Ring the ring of algebraic operators zero-values @@ -79,6 +80,7 @@ namespace grb { * unsuccessful operation otherwise */ template< + Descriptor descr, typename IOType, typename NonzeroType, class Ring @@ -92,10 +94,12 @@ namespace grb { const Ring & ring ) { RC ret = SUCCESS; - ret = ret ? ret : grb::set( smoother_temp, ring. template getZero< IOType >() ); - // acc_temp[mask] = A[mask] * x[mask] - ret = ret ? ret : grb::mxv< grb::descriptors::safe_overlap >( smoother_temp, color_mask, A, x, ring ); + // smoother_temp[color_mask] = A[color_mask] * x[color_mask] + // use the structural descriptors, assuming ONLY the values of the current color are set + // note that if this assumption does not hold, also the following eWiseLambda() is wrong + ret = ret ? ret : grb::mxv< grb::descriptors::safe_overlap | grb::descriptors::structural >( + smoother_temp, color_mask, A, x, ring ); assert( ret == SUCCESS ); // TODO internal issue #201 @@ -106,13 +110,10 @@ namespace grb { grb::eWiseLambda( [ &x, &r, &smoother_temp, &color_mask, &A_diagonal ]( const size_t i ) { // if the mask was properly initialized, the check on the mask value is unnecessary; - // nonetheless, it is left not to violate the semantics of RBGS in case also the false values - // had been initialized (in which case the check is fundamental); if only true values were initialized, - // we expect CPU branch prediction to neutralize the branch cost // if( color_mask[ i ] ) { - IOType d = A_diagonal[ i ]; - IOType v = r[ i ] - smoother_temp[ i ] + x[ i ] * d; - x[ i ] = v / d; + IOType d = A_diagonal[ i ]; + IOType v = r[ i ] - smoother_temp[ i ] + x[ i ] * d; + x[ i ] = v / d; // } }, color_mask, x, r, smoother_temp, A_diagonal ); @@ -130,6 +131,7 @@ namespace grb { * and no check is performed to ensure these assumptions hold. Hence, it is up to user logic * to pass correct coloring information. Otherwise, \b no guarantees hold on the result. * + * @tparam descr descriptor for static information * @tparam IOType type of result and intermediate vectors used during computation * @tparam NonzeroType type of matrix values * @tparam Ring the ring of algebraic operators zero-values @@ -140,6 +142,7 @@ namespace grb { * unsuccessful operation otherwise */ template< + Descriptor descr, typename IOType, typename NonzeroType, class Ring @@ -149,19 +152,27 @@ namespace grb { const Ring & ring ) { RC ret = SUCCESS; + // zero the temp output just once, assuming proper masking avoids + // interference among different colors + ret = ret ? ret : grb::set< descr >( smoothing_info.smoother_temp, + ring. template getZero< IOType >() ); + // forward step using cit_t = typename std::vector< grb::Vector< bool > >::const_iterator; cit_t end = smoothing_info.color_masks.cend(); for( cit_t it = smoothing_info.color_masks.cbegin(); it != end && ret == SUCCESS; ++it ) { - ret = rbgs_single_step( data.A, smoothing_info.A_diagonal, data.r, data.z, - smoothing_info.smoother_temp, *it, ring ); + ret = rbgs_single_step< descr >( data.A, smoothing_info.A_diagonal, data.r, + data.z, smoothing_info.smoother_temp, *it, ring ); } + ret = ret ? ret : grb::set< descr >( smoothing_info.smoother_temp, + ring. template getZero< IOType >() ); + // backward step using crit_t = typename std::vector< grb::Vector< bool > >::const_reverse_iterator; crit_t rend = smoothing_info.color_masks.crend(); for( crit_t rit = smoothing_info.color_masks.crbegin(); rit != rend && ret == SUCCESS; ++rit ) { - ret = rbgs_single_step( data.A, smoothing_info.A_diagonal, data.r, data.z, - smoothing_info.smoother_temp, *rit, ring ); + ret = rbgs_single_step< descr >( data.A, smoothing_info.A_diagonal, data.r, + data.z, smoothing_info.smoother_temp, *rit, ring ); } return ret; } @@ -177,11 +188,13 @@ namespace grb { * @tparam IOType type of result and intermediate vectors used during computation * @tparam NonzeroType type of matrix values * @tparam Ring the ring of algebraic operators + * @tparam descr descriptors with statically-known data for computation and containers */ template < typename IOType, typename NonzeroType, - class Ring + class Ring, + Descriptor descr = descriptors::no_operation > struct RedBlackGSSmootherRunner { size_t presmoother_steps; ///< number of pre-smoother steps @@ -224,7 +237,8 @@ namespace grb { SmootherData< IOType > &smoothing_info = *( levels.at( data.level ).get() ); for( size_t i = 0; i < smoother_steps && ret == SUCCESS; i++ ) { - ret = ret ? ret : internal::red_black_gauss_seidel( data, smoothing_info, ring ); + ret = ret ? ret : internal::red_black_gauss_seidel< descr >( + data, smoothing_info, ring ); assert( ret == SUCCESS ); } return ret; diff --git a/include/graphblas/algorithms/multigrid/single_matrix_coarsener.hpp b/include/graphblas/algorithms/multigrid/single_matrix_coarsener.hpp index e1ef7db73..f2b008e6f 100644 --- a/include/graphblas/algorithms/multigrid/single_matrix_coarsener.hpp +++ b/include/graphblas/algorithms/multigrid/single_matrix_coarsener.hpp @@ -69,6 +69,7 @@ namespace grb { * * The coarsening information are stored inside \p CoarseningData. * + * @tparam descr descriptor for static information * @tparam IOType type of result and intermediate vectors used during computation * @tparam NonzeroType type of matrix values * @tparam Ring the ring of algebraic operators zero-values @@ -82,6 +83,7 @@ namespace grb { * unsuccessful operation otherwise */ template< + Descriptor descr, typename IOType, typename NonzeroType, class Ring, @@ -94,16 +96,14 @@ namespace grb { const Minus & minus ) { RC ret = SUCCESS; - // DBG_print_norm( coarsening_data.Ax_finer, "+++ Ax_finer prima" ); - ret = ret ? ret : grb::eWiseApply( coarsening_data.Ax_finer, r_fine, + ret = ret ? ret : grb::eWiseApply< descr >( coarsening_data.Ax_finer, r_fine, coarsening_data.Ax_finer, minus ); // Ax_finer = r_fine - Ax_finer - // DBG_print_norm( coarsening_data.Ax_finer, "+++ Ax_finer dopo" ); assert( ret == SUCCESS ); // actual coarsening, from ncols(*coarsening_data->A) == *coarsening_data->system_size * 8 // to *coarsening_data->system_size - ret = ret ? ret : grb::set( r_coarse, ring.template getZero< IOType >() ); - ret = ret ? ret : grb::mxv< grb::descriptors::dense >( r_coarse, coarsening_data.coarsening_matrix, + ret = ret ? ret : grb::set< descr >( r_coarse, ring.template getZero< IOType >() ); + ret = ret ? ret : grb::mxv< descr >( r_coarse, coarsening_data.coarsening_matrix, coarsening_data.Ax_finer, ring ); // r = coarsening_matrix * Ax_finer return ret; } @@ -114,6 +114,7 @@ namespace grb { * * For prolongation, this function uses the matrix \p coarsening_data.coarsening_matrix by transposing it. * + * @tparam descr descriptor for static information * @tparam IOType type of result and intermediate vectors used during computation * @tparam NonzeroType type of matrix values * @tparam Ring the ring of algebraic operators zero-values @@ -125,6 +126,7 @@ namespace grb { * unsuccessful operation otherwise */ template< + Descriptor descr, typename IOType, typename NonzeroType, class Ring @@ -137,13 +139,13 @@ namespace grb { RC ret = SUCCESS; // actual refining, from *coarsening_data->syztem_size == nrows(*coarsening_data->A) / 8 // to nrows(x_fine) - ret = ret ? ret : set( coarsening_data.Ax_finer, 0 ); + ret = ret ? ret : grb::set< descr >( coarsening_data.Ax_finer, ring.template getZero< IOType >() ); - ret = ret ? ret : grb::mxv< grb::descriptors::transpose_matrix | grb::descriptors::dense >( + ret = ret ? ret : grb::mxv< descr | grb::descriptors::transpose_matrix >( coarsening_data.Ax_finer, coarsening_data.coarsening_matrix, z_coarse, ring ); assert( ret == SUCCESS ); - ret = ret ? ret : grb::foldl( x_fine, coarsening_data.Ax_finer, ring.getAdditiveMonoid() ); // x_fine += Ax_finer; + ret = ret ? ret : grb::foldl< descr >( x_fine, coarsening_data.Ax_finer, ring.getAdditiveMonoid() ); // x_fine += Ax_finer; assert( ret == SUCCESS ); return ret; } @@ -160,7 +162,8 @@ namespace grb { typename IOType, typename NonzeroType, class Ring, - class Minus + class Minus, + Descriptor descr = descriptors::no_operation > struct SingleMatrixCoarsener { static_assert( std::is_default_constructible< Ring >::value, @@ -189,10 +192,10 @@ namespace grb { ) { // first compute the residual CoarseningData< IOType, NonzeroType > &coarsener = *coarsener_levels[ finer.level ]; - grb::RC ret = grb::set( coarsener.Ax_finer, ring. template getZero< IOType >() ); - ret = ret ? ret : grb::mxv< grb::descriptors::dense >( coarsener.Ax_finer, finer.A, finer.z, ring ); + grb::RC ret = grb::set< descr >( coarsener.Ax_finer, ring. template getZero< IOType >() ); + ret = ret ? ret : grb::mxv< descr >( coarsener.Ax_finer, finer.A, finer.z, ring ); - return internal::compute_coarsening( finer.r, coarser.r, coarsener, ring, minus ); + return internal::compute_coarsening< descr >( finer.r, coarser.r, coarsener, ring, minus ); } /** @@ -203,7 +206,7 @@ namespace grb { const MultiGridInputType &coarser, MultiGridInputType &finer ) { - return internal::compute_prolongation( coarser.z, finer.z, *coarsener_levels[ finer.level ], ring ); + return internal::compute_prolongation< descr >( coarser.z, finer.z, *coarsener_levels[ finer.level ], ring ); } }; diff --git a/tests/smoke/hpcg.cpp b/tests/smoke/hpcg.cpp index bba6e9c2f..f7cd05787 100644 --- a/tests/smoke/hpcg.cpp +++ b/tests/smoke/hpcg.cpp @@ -97,9 +97,10 @@ using StdRing = Semiring< grb::operators::add< NonzeroType >, grb::operators::mu grb::identities::zero, grb::identities::one >; using StdMinus = operators::subtract< NonzeroType >; using coord_t = size_t; +constexpr Descriptor hpcg_desc = descriptors::dense; // assembled types for simulation runners and input/output structures -using hpcg_runner_t = HPCGRunnerType< IOType, NonzeroType, InputType, ResidualType, +using hpcg_runner_t = HPCGRunnerType< hpcg_desc, IOType, NonzeroType, InputType, ResidualType, StdRing, StdMinus >; using mg_data_t = MultiGridData< IOType, NonzeroType >; using coarsening_data_t = CoarseningData< IOType, NonzeroType >; @@ -338,7 +339,7 @@ void grbProgram( const simulation_input & in, struct output & out ) { std::unique_ptr< hpcg_data_t > hpcg_state; // define the main HPCG runner and initialize the options of its components - hpcg_runner_t hpcg_runner( build_hpcg_runner< IOType, NonzeroType, InputType, ResidualType, + hpcg_runner_t hpcg_runner( build_hpcg_runner< hpcg_desc, IOType, NonzeroType, InputType, ResidualType, StdRing, StdMinus >( in.smoother_steps ) ); auto &mg_runner = hpcg_runner.mg_runner; auto &coarsener = mg_runner.coarsener_runner; From 571fdd5391880fe522146eecb5b8753b9a0c2067 Mon Sep 17 00:00:00 2001 From: Alberto Scolari Date: Wed, 22 Feb 2023 21:02:36 +0100 Subject: [PATCH 19/28] adding telemetry functionalities: * token to enable/disable all functionalities at compile time/run time * stopwatch to measure elapsed time * output stream to selectively log information (depending on token) * CSV writer object to emit tracing info in a convenient format --- .../utils/iterators/IteratorValueAdaptor.hpp | 8 +- .../graphblas/utils/telemetry/CSVWriter.hpp | 293 ++++++++++++++++++ .../utils/telemetry/OutputStream.hpp | 152 +++++++++ .../graphblas/utils/telemetry/Stopwatch.hpp | 143 +++++++++ .../graphblas/utils/telemetry/Telemetry.hpp | 32 ++ .../utils/telemetry/TelemetryBase.hpp | 98 ++++++ .../utils/telemetry/TelemetryToken.hpp | 145 +++++++++ .../graphblas/utils/telemetry/Timeable.hpp | 101 ++++++ 8 files changed, 968 insertions(+), 4 deletions(-) create mode 100644 include/graphblas/utils/telemetry/CSVWriter.hpp create mode 100644 include/graphblas/utils/telemetry/OutputStream.hpp create mode 100644 include/graphblas/utils/telemetry/Stopwatch.hpp create mode 100644 include/graphblas/utils/telemetry/Telemetry.hpp create mode 100644 include/graphblas/utils/telemetry/TelemetryBase.hpp create mode 100644 include/graphblas/utils/telemetry/TelemetryToken.hpp create mode 100644 include/graphblas/utils/telemetry/Timeable.hpp diff --git a/include/graphblas/utils/iterators/IteratorValueAdaptor.hpp b/include/graphblas/utils/iterators/IteratorValueAdaptor.hpp index bca870af8..2c0383325 100644 --- a/include/graphblas/utils/iterators/IteratorValueAdaptor.hpp +++ b/include/graphblas/utils/iterators/IteratorValueAdaptor.hpp @@ -56,8 +56,8 @@ namespace grb { static_assert( std::is_copy_assignable< AdaptorType >::value, "AdaptorType must be copy-assignable" ); - typedef decltype( std::declval< AdaptorType >()( *std::declval< InnerIterType >() ) ) reference; - typedef typename std::decay< reference >::type value_type; + typedef typename std::decay< decltype( *std::declval< AdaptorType >()( *std::declval< InnerIterType >() ) ) >::type value_type; + typedef value_type & reference; typedef value_type * pointer; typedef const value_type * const_pointer; typedef typename std::iterator_traits< InnerIterType >::iterator_category iterator_category; @@ -129,9 +129,9 @@ namespace grb { bool operator==( const SelfType & o ) const { return ! operator!=( o ); } - reference operator*() { return adaptor( *iter ); } + reference operator*() { return *adaptor( *iter ); } - const reference operator*() const { return adaptor( *iter ); } + const reference operator*() const { return *adaptor( *iter ); } pointer operator->() { return adaptor( *iter ); } diff --git a/include/graphblas/utils/telemetry/CSVWriter.hpp b/include/graphblas/utils/telemetry/CSVWriter.hpp new file mode 100644 index 000000000..969b73be8 --- /dev/null +++ b/include/graphblas/utils/telemetry/CSVWriter.hpp @@ -0,0 +1,293 @@ + +/* + * Copyright 2023 Huawei Technologies Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * @author Alberto Scolari + * @date 14th February, 2023 + */ + +#ifndef _H_GRB_UTILS_TELEMETRY_CSV_WRITER +#define _H_GRB_UTILS_TELEMETRY_CSV_WRITER + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "TelemetryBase.hpp" + +namespace grb { + namespace utils { + namespace telemetry { + + static constexpr char STD_CSV_SEP = ','; + + template< + typename TelTokenType, + bool enabled, + class T1, + class ...Ts + > class CSVWriter : public TelemetryBase< TelTokenType, enabled > { + public: + template< class U, class ...Us > struct is_csv_printable { + static constexpr bool value = std::is_arithmetic< U >::value; + }; + + template< class U1, class U2, class ...Us > struct is_csv_printable< U1, U2, Us...> { + static constexpr bool value = is_csv_printable< U1 >::value && is_csv_printable< U2, Us... >::value; + }; + + static_assert( is_csv_printable< T1, Ts... >::value, "not all types are printable" ); + + using self_t = CSVWriter< TelTokenType, enabled, T1, Ts... >; + + using base_t = TelemetryBase< TelTokenType, enabled >; + + CSVWriter() = delete; + + CSVWriter( + const TelTokenType & tt, + std::initializer_list< const char * > _headers, + char _separator, + size_t size + ) : + base_t( tt ) + { + ( void ) tt; + ( void ) _headers; + ( void ) _separator; + ( void ) size; + } + + CSVWriter( const TelTokenType & tt, std::initializer_list< const char * > _headers ) : + CSVWriter( tt, _headers, STD_CSV_SEP, 10 ) + {} + + CSVWriter( const self_t & ) = delete; + + CSVWriter( self_t && ) = delete; + + self_t & operator=( const self_t & ) = delete; + + self_t & operator=( self_t && ) = delete; + + template< class... UTypes > void add_line( UTypes&&... ) { + } + + void clear() {} + + std::ostream & write_last_line_to_stream( std::ostream & stream ) const { + return stream; + } + + // print nothing + char last_line() const { return '\0'; } + + std::ostream & write_to_stream( std::ostream & stream ) const { + return stream; + } + + void write_to_file( const char * name ) const { + ( void ) name; + } + }; + + + template< + typename TelTokenType, + class T1, + class ...Ts + > class CSVWriter< TelTokenType, true, T1, Ts... > : public TelemetryBase< TelTokenType, true > { + public: + template< class U, class ...Us > struct is_csv_printable { + static constexpr bool value = std::is_arithmetic< U >::value; + }; + + template< class U1, class U2, class ...Us > struct is_csv_printable< U1, U2, Us...> { + static constexpr bool value = is_csv_printable< U1 >::value && is_csv_printable< U2, Us... >::value; + }; + + static_assert( is_csv_printable< T1, Ts... >::value, "not all types are printable" ); + + using self_t = CSVWriter< TelTokenType, true, T1, Ts... >; + + using base_t = TelemetryBase< TelTokenType, true >; + + class CSVLastTuple { + public: + CSVLastTuple( const self_t & _csv ) : csv( _csv ) {} + + CSVLastTuple( const CSVLastTuple & clt ) : csv( clt.csv ) {} + + inline friend std::ostream & operator<<( + std::ostream & stream, + const CSVLastTuple & t + ) { + return t.csv.write_last_line_to_stream( stream ); + } + + private: + const self_t & csv; + }; + + CSVWriter() = delete; + + CSVWriter( + const TelTokenType & tt, + std::initializer_list< const char * > _headers, + char _separator, + size_t size + ) : + base_t( tt ), + separator( _separator ) + { + if( _headers.size() != NUM_FIELDS ) { + throw std::runtime_error( "wrong number of headers, it must match the unmber of line elements" ); + } + // emplace anyway, so that the object is always in a consistent state and can be + // activated/deactivated at runtime + for( const auto & h : _headers ) { + headers.emplace_back( h ); + } + if ( !tt.is_active() ) { + return; + } + lines.reserve( size ); + // zero to force physical allocation + //std::memset( reinterpret_cast< void * >( lines.data() ), 0, lines.size() * sizeof( tuple_t ) ); + } + + CSVWriter( const TelTokenType & tt, std::initializer_list< const char * > _headers ) : + CSVWriter( tt, _headers, STD_CSV_SEP, 10 ) + {} + + CSVWriter( const self_t & ) = delete; + + CSVWriter( self_t && ) = delete; + + self_t & operator=( const self_t & ) = delete; + + self_t & operator=( self_t && ) = delete; + + template< class... UTypes > void add_line( UTypes&&... vs ) { + if ( this->is_active() ) { + lines.emplace_back( std::forward( vs )... ); + } + } + + void clear() { + lines.clear(); + } + + std::ostream & write_last_line_to_stream( std::ostream & stream ) const { + if ( lines.size() > 0 && this->is_active() ) { + write_line( stream, lines.back() ); + } + return stream; + } + + CSVLastTuple last_line() const { + if ( lines.size() == 0 ) { + throw std::runtime_error( "no measures" ); + } + return CSVLastTuple( *this ); + } + + std::ostream & write_to_stream( std::ostream & stream ) const { + if ( !this->is_active() ) { + return stream; + } + write_header( stream ); + stream << NEW_LINE; + for( const tuple_t & line : lines ) { + write_line( stream, line ); + stream << NEW_LINE; + } + return stream; + } + + void write_to_file( const char * name ) const { + if ( !this->is_active() ) { + return; + } + std::ofstream file( name ); + if( !file.is_open() ) { + throw std::runtime_error( "cannot open file" ); + } + write_to_stream( file ); + file.close(); + } + + private: + static constexpr char NEW_LINE = '\n'; + + static constexpr size_t NUM_FIELDS = sizeof...( Ts ) + 1; + + using tuple_t = std::tuple< T1, Ts... >; + + std::vector< std::string > headers; + const char separator; + std::vector< tuple_t > lines; + + std::ostream & write_header( std::ostream & stream ) const { + stream << headers[ 0 ]; + for( size_t i = 1; i < headers.size(); i++ ) { + stream << separator << headers[ i ]; + } + return stream; + } + + void write_line( std::ostream & stream, const tuple_t & line ) const { + write_val< 0 >( stream, line ); + } + + // recursive case + template< size_t OFFS > inline void write_val( + std::ostream & stream, + typename std::enable_if< OFFS < NUM_FIELDS - 1, const tuple_t &>::type _tup + ) const { + stream << std::get< OFFS >( _tup ) << separator; + write_val< OFFS + 1 >( stream, _tup ); // tail recursion + } + + // base case + template< size_t OFFS > inline void write_val( + std::ostream & stream, + typename std::enable_if< OFFS == NUM_FIELDS - 1, const tuple_t &>::type _tup + ) const { + (void) separator; + stream << std::get< OFFS >( _tup ); + } + + }; + + template< + class T1, + class ...Ts + > using StaticCSVWriter = CSVWriter< TelemetryTokenAlwaysOn, true, T1, Ts... >; + + } + } +} + + +#endif // _H_GRB_UTILS_TELEMETRY_CSV_WRITER diff --git a/include/graphblas/utils/telemetry/OutputStream.hpp b/include/graphblas/utils/telemetry/OutputStream.hpp new file mode 100644 index 000000000..35622b11a --- /dev/null +++ b/include/graphblas/utils/telemetry/OutputStream.hpp @@ -0,0 +1,152 @@ + +/* + * Copyright 2023 Huawei Technologies Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * @author Alberto Scolari + * @date 14th February, 2023 + */ + +#ifndef _H_GRB_UTILS_TELEMETRY_OUTPUT_STREAM +#define _H_GRB_UTILS_TELEMETRY_OUTPUT_STREAM + +#include +#include +#include +#include + +#include "TelemetryBase.hpp" + +namespace grb { + namespace utils { + namespace telemetry { + + template< typename T > struct is_ostream_input { + + template< typename U > static constexpr bool is_input( + typename std::enable_if< std::is_same< + // this means that the expression std::cout << obj is valid, where obj is of type T + decltype( std::declval< std::ostream& >() << std::declval< U >() ), + std::ostream& >::value, nullptr_t >::type + ) { + return true; + } + + template< typename U > static constexpr bool is_input( ... ) { + return false; + } + + static constexpr bool value = is_input< T >( nullptr ); + }; + + class OutputStreamLazy { + constexpr char operator()() const { return '\0'; } + }; + + template< + typename TelTokenType, + bool enabled = TelTokenType::enabled + > class OutputStream : public TelemetryBase< TelTokenType, enabled > { + public: + using self_t = OutputStream< TelTokenType, enabled >; + + OutputStream() = default; + + OutputStream( const TelTokenType & _tt, std::ostream & _out ) : + TelemetryBase< TelTokenType, enabled >( _tt ) + { + ( void ) _out; + } + + OutputStream( const self_t & _out ) = default; + + OutputStream & operator=( const self_t & _out ) = delete; + + template< typename T > inline typename std::enable_if< + is_ostream_input< T >::value, + self_t & >::type operator<<( T&& v ) { + ( void ) v; + return *this; + } + + inline self_t & operator<<( std::ostream& (*func)( std::ostream& ) ) { + ( void ) func; + return *this; + } + + template< class F > inline typename std::enable_if< + is_ostream_input< decltype( std::declval< F >()() ) >::value + && std::is_base_of< OutputStreamLazy, F >::value, + self_t & >::type operator<<( F&& fun ) { + ( void ) fun; + return *this; + } + }; + + template< typename TelTokenType > class OutputStream< TelTokenType, true > : + public TelemetryBase< TelTokenType, true > { + public: + using self_t = OutputStream< TelTokenType, true >; + + using base_t = TelemetryBase< TelTokenType, true >; + + OutputStream( const TelTokenType & _tt, std::ostream & _out ) : + TelemetryBase< TelTokenType, true >( _tt ), + out( _out ) + {} + + OutputStream( const self_t & _outs ) = default; + + OutputStream & operator=( const self_t & _out ) = delete; + + template< typename T > inline typename std::enable_if< + is_ostream_input< T >::value, + self_t & >::type operator<<( T&& v ) { + if ( this->is_active() ) { + out << std::forward< T >( v ); + } + return *this; + } + + inline self_t & operator<<( std::ostream& (*func)( std::ostream& ) ) { + if ( this->is_active() ) { + out << func; + } + return *this; + } + + template< class F > inline typename std::enable_if< + is_ostream_input< decltype( std::declval< F >()() ) >::value + && std::is_base_of< OutputStreamLazy, F >::value, + self_t & >::type operator<<( F&& fun ) { + if ( this->is_active() ) { + out << fun(); + } + return *this; + } + + private: + std::ostream & out; + }; + + using OutputStreamOff = OutputStream< TelemetryTokenAlwaysOff, false >; + + using OutputStreamOn = OutputStream< TelemetryTokenAlwaysOn, true >; + } + } +} + +#endif // _H_GRB_UTILS_TELEMETRY_OUTPUT_STREAM diff --git a/include/graphblas/utils/telemetry/Stopwatch.hpp b/include/graphblas/utils/telemetry/Stopwatch.hpp new file mode 100644 index 000000000..2cc900b61 --- /dev/null +++ b/include/graphblas/utils/telemetry/Stopwatch.hpp @@ -0,0 +1,143 @@ + +/* + * Copyright 2023 Huawei Technologies Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * @author Alberto Scolari + * @date 14th February, 2023 + */ + +#ifndef _H_GRB_UTILS_TELEMETRY_STOPWATCH +#define _H_GRB_UTILS_TELEMETRY_STOPWATCH + +#include + +#include "TelemetryBase.hpp" + +namespace grb { + namespace utils { + namespace telemetry { + + using duration_nano_t = size_t; + + using duration_float_t = double; + + class StopwatchBase { + public: + static inline duration_float_t nano2Micro( duration_nano_t nano ) { + return static_cast< duration_float_t >( nano ) / 1000UL; + } + + static inline duration_float_t nano2Milli( duration_nano_t nano ) { + return static_cast< duration_float_t >( nano ) / 1000000UL; + } + + static inline duration_float_t nano2Sec( duration_nano_t nano ) { + return static_cast< duration_float_t >( nano ) / 1000000000UL; + } + + }; + + template< + typename TelTokenType, + bool enabled = TelTokenType::enabled + > class Stopwatch: + public StopwatchBase, public TelemetryBase< TelTokenType, enabled > { + public: + Stopwatch( const TelTokenType & tt ) : + StopwatchBase(), + TelemetryBase< TelTokenType, enabled >( tt ) + {} + + Stopwatch( const Stopwatch & ) = default; + + constexpr inline void start() {} + + constexpr inline duration_nano_t stop() { + return static_cast< duration_nano_t >( 0 ); + } + + constexpr inline duration_nano_t reset() { + return static_cast< duration_nano_t >( 0 ); + } + + constexpr inline duration_nano_t getElapsedNano() const { + return static_cast< duration_nano_t >( 0 ); + } + }; + + + template< + typename TelTokenType + > class Stopwatch< TelTokenType, true >: + public StopwatchBase, public TelemetryBase< TelTokenType, true > { + + typedef typename std::chrono::high_resolution_clock clock_t; + + typedef typename std::chrono::nanoseconds duration_t; + + typedef typename std::chrono::high_resolution_clock::time_point time_point_t; + + duration_t elapsedTime; + + time_point_t beginning; + + public: + Stopwatch( const TelTokenType & tt ) : + StopwatchBase(), + TelemetryBase< TelTokenType, true >( tt ), + elapsedTime( duration_t::zero() ) + {} + + Stopwatch( const Stopwatch & s ) = default; + + inline void start() { + if ( this->is_active() ) { + beginning = clock_t::now(); + } + } + + inline duration_nano_t stop() { + duration_nano_t count = 0; + if ( this->is_active() ) { + time_point_t end = clock_t::now(); + duration_t d = end - beginning; + count = d.count(); + elapsedTime += d; + } + return count; + } + + inline duration_nano_t reset() { + duration_t r = duration_t::zero(); + if ( this->is_active() ) { + r = elapsedTime; + elapsedTime = duration_t::zero(); + } + return static_cast< duration_nano_t >( r.count() ); + } + + inline duration_nano_t getElapsedNano() const { + return static_cast< duration_nano_t >( elapsedTime.count() ); + } + }; + + using StaticStopwatch = Stopwatch< TelemetryTokenAlwaysOn, true >; + } + } +} + +#endif // _H_GRB_UTILS_TELEMETRY_STOPWATCH diff --git a/include/graphblas/utils/telemetry/Telemetry.hpp b/include/graphblas/utils/telemetry/Telemetry.hpp new file mode 100644 index 000000000..f8369d1d1 --- /dev/null +++ b/include/graphblas/utils/telemetry/Telemetry.hpp @@ -0,0 +1,32 @@ + +/* + * Copyright 2023 Huawei Technologies Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * @author Alberto Scolari + * @date 14th February, 2023 + */ + +#ifndef _H_GRB_UTILS_TELEMETRY_TELEMETRY +#define _H_GRB_UTILS_TELEMETRY_TELEMETRY + +#include "TelemetryToken.hpp" +#include "Stopwatch.hpp" +#include "Timeable.hpp" +#include "CSVWriter.hpp" +#include "OutputStream.hpp" + +#endif // _H_GRB_UTILS_TELEMETRY_TELEMETRY diff --git a/include/graphblas/utils/telemetry/TelemetryBase.hpp b/include/graphblas/utils/telemetry/TelemetryBase.hpp new file mode 100644 index 000000000..969f93213 --- /dev/null +++ b/include/graphblas/utils/telemetry/TelemetryBase.hpp @@ -0,0 +1,98 @@ + +/* + * Copyright 2023 Huawei Technologies Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * @author Alberto Scolari + * @date 14th February, 2023 + */ + +#ifndef _H_GRB_UTILS_TELEMETRY_TELEMETRY_BASE +#define _H_GRB_UTILS_TELEMETRY_TELEMETRY_BASE + +#include "TelemetryToken.hpp" + +namespace grb { + namespace utils { + namespace telemetry { + + template< + typename TelTokenType, + bool enabled = TelTokenType::enabled + > class TelemetryBase { + public: + static_assert( is_telemetry_token< TelTokenType >::value, + "type TelTokenType does not implement Telemetry Token interface" ); + + using self_t = TelemetryBase< TelTokenType, enabled >; + + TelemetryBase() = default; + + TelemetryBase( const TelTokenType & tt ) { + ( void ) tt; + } + + TelemetryBase( const self_t & ) = default; + + self_t & operator=( const self_t & ) = delete; + + constexpr bool is_active() const { return false; } + }; + + + template< + typename TelTokenType + > class TelemetryBase< TelTokenType, true > { + + const TelTokenType & telemetry_token; + + public: + static_assert( is_telemetry_token< TelTokenType >::value, + "type TelTokenType does not implement Telemetry Token interface" ); + + using self_t = TelemetryBase< TelTokenType, true >; + + TelemetryBase( const TelTokenType & tt ): telemetry_token( tt ) {} + + TelemetryBase( const self_t & tb ) : telemetry_token( tb.telemetry_token ) {} + + self_t & operator=( const self_t & ) = delete; + + bool is_active() const { return telemetry_token.is_active(); } + }; + + // always actibe base, especially for prototyping scenarios + template<> class TelemetryBase< TelemetryTokenAlwaysOn, true > { + public: + static_assert( is_telemetry_token< TelemetryTokenAlwaysOn >::value, + "type TelTokenType does not implement Telemetry Token interface" ); + + using self_t = TelemetryBase< TelemetryTokenAlwaysOn, true >; + + TelemetryBase( const TelemetryTokenAlwaysOn & tt ) { (void) tt; } + + TelemetryBase( const self_t & tb ) = default; + + self_t & operator=( const self_t & ) = delete; + + constexpr bool is_active() const { return true; } + }; + + } + } +} + +#endif // _H_GRB_UTILS_TELEMETRY_TELEMETRY_BASE diff --git a/include/graphblas/utils/telemetry/TelemetryToken.hpp b/include/graphblas/utils/telemetry/TelemetryToken.hpp new file mode 100644 index 000000000..dabac3c2e --- /dev/null +++ b/include/graphblas/utils/telemetry/TelemetryToken.hpp @@ -0,0 +1,145 @@ + +/* + * Copyright 2023 Huawei Technologies Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * @author Alberto Scolari + * @date 14th February, 2023 + */ + +#ifndef _H_GRB_UTILS_TELEMETRY_TELEMETRY_TOKEN +#define _H_GRB_UTILS_TELEMETRY_TELEMETRY_TOKEN + +#include +#include + +namespace grb { + namespace utils { + namespace telemetry { + + template< typename T > constexpr bool is_token_enabled() { return false; } + + // OFF + template< bool en > class TelemetryTokenBase { + public: + using self_t = TelemetryTokenBase< en >; + + TelemetryTokenBase( bool _enabled ) { + (void) _enabled; + } + + TelemetryTokenBase() = delete; + + TelemetryTokenBase( const self_t & ) = delete; + + TelemetryTokenBase& operator=( const self_t & ) = delete; + + constexpr bool is_active() const { return false; } + + static constexpr bool enabled = false; + }; + + using TelemetryTokenAlwaysOff = TelemetryTokenBase< false >; + + template<> class TelemetryTokenBase< true > { + public: + using self_t = TelemetryTokenBase< true >; + + TelemetryTokenBase( bool _active ) : active( _active ) {} + + TelemetryTokenBase() = delete; + + TelemetryTokenBase( const self_t & ) = delete; + + TelemetryTokenBase& operator=( const self_t & ) = delete; + + bool is_active() const { return this->active; } + + static constexpr bool enabled = true; + + protected: + const bool active; + }; + + // always active token, especially for prototyping scenarios + class TelemetryTokenAlwaysOn { + public: + TelemetryTokenAlwaysOn( bool _enabled ) { + (void) _enabled; + } + + TelemetryTokenAlwaysOn() = delete; + + TelemetryTokenAlwaysOn( const TelemetryTokenAlwaysOn & ) = delete; + + TelemetryTokenAlwaysOn& operator=( const TelemetryTokenAlwaysOn & ) = delete; + + constexpr bool is_active() const { return true; } + + static constexpr bool enabled = true; + }; + + + template< typename T > struct is_telemetry_token { + private: + template< typename U > static constexpr bool has_enabled_field( + typename std::enable_if< + std::is_same< typename std::decay< decltype( U::enabled ) >::type, bool >::value, + bool * >::type + ) { + return true; + } + + template< typename U > static constexpr bool has_enabled_field( ... ) { return false; } + + template< typename U > static constexpr bool has_is_active_method( + typename std::enable_if< + std::is_same< typename std::decay< decltype( std::declval< U >().is_active() ) >::type, bool >::value, + bool * >::type + ) { + return true; + } + + template< typename U > static constexpr bool has_is_active_method( ... ) { return false; } + + public: + static constexpr bool value = has_enabled_field< T >( nullptr ) && has_is_active_method< T >( nullptr ); + }; + } + + } +} + +#define __TELEMETRY_TOKEN_ENABLER_NAME( name ) __ ## name ## Enabler +#define __TELEMETRY_TOKEN_NAME( name ) name + +#define DECLARE_TELEMETRY_TOKEN( name ) \ + class __TELEMETRY_TOKEN_ENABLER_NAME( name ) {}; \ + template< typename T > class __TELEMETRY_TOKEN_NAME( name ) : \ + public grb::utils::telemetry::TelemetryTokenBase< grb::utils::telemetry::is_token_enabled< T >() > { \ + public: \ + using base_t = grb::utils::telemetry::TelemetryTokenBase< grb::utils::telemetry::is_token_enabled< T >() >; \ + __TELEMETRY_TOKEN_NAME( name )( bool _enabled ) : base_t( _enabled ) {} \ + }; + + +#define ACTIVATE_TOKEN( name ) namespace grb { namespace utils { namespace telemetry { \ + template<> constexpr bool is_token_enabled< __TELEMETRY_TOKEN_ENABLER_NAME( name ) >() { return true; } \ +} } } + +#define TELEMETRY_TOKEN_TYPE( name ) __TELEMETRY_TOKEN_NAME( name )< __TELEMETRY_TOKEN_ENABLER_NAME( name ) > + +#endif // _H_GRB_UTILS_TELEMETRY_TELEMETRY_TOKEN diff --git a/include/graphblas/utils/telemetry/Timeable.hpp b/include/graphblas/utils/telemetry/Timeable.hpp new file mode 100644 index 000000000..02dd85b9e --- /dev/null +++ b/include/graphblas/utils/telemetry/Timeable.hpp @@ -0,0 +1,101 @@ + +/* + * Copyright 2023 Huawei Technologies Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * @author Alberto Scolari + * @date 14th February, 2023 + */ + +#ifndef _H_GRB_UTILS_TIMEABLE +#define _H_GRB_UTILS_TIMEABLE + +#include "Stopwatch.hpp" + +namespace grb { + namespace utils { + namespace telemetry { + + template< + typename TelTokenType, + bool enabled = TelTokenType::enabled + > class Timeable { + public: + using self_t = Timeable< TelTokenType, enabled >; + + Timeable( const TelTokenType & tt ) { + (void) tt; + } + + Timeable( const self_t & ) = default; + + Timeable& operator=( const self_t & ) = delete; + + constexpr inline duration_nano_t getElapsedNano() const { + return static_cast< duration_nano_t >( 0 ); + } + + constexpr inline duration_nano_t reset() { + return static_cast< duration_nano_t >( 0 ); + } + + protected: + inline void start() {} + + constexpr inline duration_nano_t stop() { + return static_cast< duration_nano_t >( 0 ); + } + + }; + + template< typename TelTokenType > class Timeable< TelTokenType, true > { + public: + using self_t = Timeable< TelTokenType, true >; + + Timeable( const TelTokenType & tt ) : swatch( tt ) {} + + Timeable( const self_t & ) = default; + + Timeable& operator=( const self_t & ) = delete; + + inline duration_nano_t getElapsedNano() const { + return swatch.getElapsedNano(); + } + + inline duration_nano_t reset() { + return swatch.reset(); + } + + protected: + inline void start() { + swatch.start(); + } + + inline duration_nano_t stop() { + return swatch.stop(); + } + + private: + Stopwatch< TelTokenType > swatch; + }; + + using StaticTimeable = Timeable< TelemetryTokenAlwaysOn, true >; + + } + } +} + +#endif // _H_GRB_UTILS_TIMEABLE From 7833b319752498c7d197815cc88c8dad345fe33c Mon Sep 17 00:00:00 2001 From: Alberto Scolari Date: Thu, 23 Feb 2023 15:46:51 +0100 Subject: [PATCH 20/28] restructuring Multigrid and HPCG to clean code and add flexible logging fixing MG telemetry - missing reset --- include/graphblas/algorithms/hpcg/hpcg.hpp | 94 ---- .../algorithms/hpcg/system_builder.hpp | 6 + .../algorithms/hpcg/system_building_utils.hpp | 37 +- .../multigrid/multigrid_building_utils.hpp | 18 +- .../algorithms/multigrid/multigrid_cg.hpp | 405 +++++++++--------- .../algorithms/multigrid/multigrid_data.hpp | 14 +- .../multigrid/multigrid_v_cycle.hpp | 292 ++++++------- .../multigrid/red_black_gauss_seidel.hpp | 223 +++++----- .../multigrid/single_matrix_coarsener.hpp | 166 +++---- tests/smoke/hpcg.cpp | 354 +++++++++------ 10 files changed, 761 insertions(+), 848 deletions(-) delete mode 100644 include/graphblas/algorithms/hpcg/hpcg.hpp diff --git a/include/graphblas/algorithms/hpcg/hpcg.hpp b/include/graphblas/algorithms/hpcg/hpcg.hpp deleted file mode 100644 index b4884f4e1..000000000 --- a/include/graphblas/algorithms/hpcg/hpcg.hpp +++ /dev/null @@ -1,94 +0,0 @@ - -/* - * Copyright 2022 Huawei Technologies Co., Ltd. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * @dir include/graphblas/algorithms/hpcg - * This folder contains the code specific to the HPCG benchmark implementation: generation of the physical system, - * generation of the single point coarsener and coloring algorithm. - */ - -/** - * @file hpcg.hpp - * @author Alberto Scolari (alberto.scolari@huawei.com) - * Utility to build a full HPCG runner, bringing together all needed data structures. - */ - -#ifndef _H_GRB_ALGORITHMS_HPCG_HPCG -#define _H_GRB_ALGORITHMS_HPCG_HPCG - -#include - -#include -#include -#include -#include - -namespace grb { - namespace algorithms { - - // simply "assemble" types - template< - Descriptor descr, - typename IOType, - typename ResidualType, - typename NonzeroType, - typename InputType, - class Ring, - class Minus - > using HPCGRunnerType = MultiGridCGRunner< IOType, NonzeroType, InputType, ResidualType, - MultiGridRunner< - RedBlackGSSmootherRunner< IOType, NonzeroType, Ring, descr >, - SingleMatrixCoarsener< IOType, NonzeroType, Ring, Minus, descr >, - IOType, NonzeroType, Ring, Minus, descr - >, Ring, Minus, descr - >; - - /** - * Builds a full HPCG runner object by "assemblying" all needed information, - * with default type for smoother, coarsener and multi-grid runner. - * - * @param[in] smoother_steps how many times the smoother should run (both pre- and post-smoothing) - */ - template< - Descriptor descr, - typename IOType, - typename ResidualType, - typename NonzeroType, - typename InputType, - class Ring, - class Minus - > HPCGRunnerType< descr, IOType, ResidualType, NonzeroType, InputType, Ring, Minus > - build_hpcg_runner( size_t smoother_steps ) { - - SingleMatrixCoarsener< IOType, NonzeroType, Ring, Minus, descr > coarsener; - RedBlackGSSmootherRunner< IOType, NonzeroType, Ring, descr > - smoother( { smoother_steps, smoother_steps, 1UL, {}, Ring() } ); - - MultiGridRunner< - RedBlackGSSmootherRunner< IOType, NonzeroType, Ring, descr >, - SingleMatrixCoarsener< IOType, NonzeroType, Ring, Minus, descr >, - IOType, NonzeroType, Ring, Minus, descr - > mg_runner( std::move( smoother ), std::move( coarsener ) ); - - return HPCGRunnerType< descr, IOType, ResidualType, NonzeroType, InputType, Ring, Minus >( - std::move( mg_runner ) ); - } - - } // namespace algorithms -} // namespace grb - -#endif // _H_GRB_ALGORITHMS_HPCG_HPCG diff --git a/include/graphblas/algorithms/hpcg/system_builder.hpp b/include/graphblas/algorithms/hpcg/system_builder.hpp index e19ba208d..94d1565f2 100644 --- a/include/graphblas/algorithms/hpcg/system_builder.hpp +++ b/include/graphblas/algorithms/hpcg/system_builder.hpp @@ -15,6 +15,12 @@ * limitations under the License. */ +/** + * @dir include/graphblas/algorithms/hpcg + * This folder contains the code specific to the HPCG benchmark implementation: generation of the physical system, + * generation of the single point coarsener and coloring algorithm. + */ + /** * @file system_builders.hpp * @author Alberto Scolari (alberto.scolari@huawei.com) diff --git a/include/graphblas/algorithms/hpcg/system_building_utils.hpp b/include/graphblas/algorithms/hpcg/system_building_utils.hpp index 9503f77ff..b86564def 100644 --- a/include/graphblas/algorithms/hpcg/system_building_utils.hpp +++ b/include/graphblas/algorithms/hpcg/system_building_utils.hpp @@ -141,16 +141,15 @@ namespace grb { template < size_t DIMS, typename CoordType, - typename NonzeroType + typename NonzeroType, + typename Logger > grb::RC hpcg_populate_system_matrix( const grb::algorithms::HPCGSystemBuilder< DIMS, CoordType, NonzeroType > &system_generator, - grb::Matrix< NonzeroType > &M + grb::Matrix< NonzeroType > &M, + Logger & logger ) { - const size_t pid = spmd<>::pid(); - if( pid == 0) { - std::cout << "- generating system matrix..."; - } + logger << "- generating system matrix..."; typename grb::algorithms::HPCGSystemBuilder< DIMS, CoordType, NonzeroType >::Iterator begin( system_generator.make_begin_iterator() ); typename grb::algorithms::HPCGSystemBuilder< DIMS, CoordType, NonzeroType >::Iterator end( @@ -421,24 +420,20 @@ namespace grb { template< size_t DIMS, typename CoordType, - typename NonzeroType + typename NonzeroType, + typename Logger > grb::RC hpcg_populate_smoothing_data( const grb::algorithms::HPCGSystemBuilder< DIMS, CoordType, NonzeroType > &system_generator, - SmootherData< NonzeroType > &smoothing_info + SmootherData< NonzeroType > &smoothing_info, + Logger & logger ) { - const size_t pid = spmd<>::pid(); - grb::RC rc = set( smoothing_info.A_diagonal, system_generator.get_diag_value() ); if( rc != grb::SUCCESS ) { - if( pid == 0 ) { - std::cout << "error: " << __LINE__ << std::endl; - } + logger << "error: " << __LINE__ << std::endl; return rc; } - if( pid == 0 ) { - std::cout << "- running coloring heuristics..."; - } + logger << "- running coloring heuristics..."; std::vector< CoordType > colors, color_counters; hpcg_greedy_color_ndim_system( system_generator.get_generator(), colors, color_counters ); std::vector< std::vector< CoordType > > per_color_rows; @@ -446,15 +441,11 @@ namespace grb { colors.clear(); colors.shrink_to_fit(); if( rc != grb::SUCCESS ) { - if( pid == 0 ) { - std::cout << "error: " << __LINE__ << std::endl; - } + logger << "error: " << __LINE__ << std::endl; return rc; } - if( pid == 0 ) { - std::cout <<"- found " << color_counters.size() << " colors," - << " generating color masks..."; - } + logger <<"- found " << color_counters.size() << " colors," + << " generating color masks..."; return internal::hpcg_build_static_color_masks( system_generator.system_size(), per_color_rows, smoothing_info.color_masks ); } diff --git a/include/graphblas/algorithms/multigrid/multigrid_building_utils.hpp b/include/graphblas/algorithms/multigrid/multigrid_building_utils.hpp index 75d23a7cc..ad09f4c9f 100644 --- a/include/graphblas/algorithms/multigrid/multigrid_building_utils.hpp +++ b/include/graphblas/algorithms/multigrid/multigrid_building_utils.hpp @@ -21,13 +21,13 @@ * Utilities to allocate data for an entire multi-grid simulation. */ +#ifndef _H_GRB_ALGORITHMS_MULTIGRID_BUILDING_UTILS +#define _H_GRB_ALGORITHMS_MULTIGRID_BUILDING_UTILS + #include #include #include -#ifndef _H_GRB_ALGORITHMS_MULTIGRID_BUILDING_UTILS -#define _H_GRB_ALGORITHMS_MULTIGRID_BUILDING_UTILS - namespace grb { namespace algorithms { @@ -66,18 +66,20 @@ namespace grb { template< typename MGInfoType, typename CoarsenerInfoType, - typename SmootherInfoType + typename SmootherInfoType, + typename TelTokenType > void multigrid_allocate_data( - const std::vector< size_t > &mg_sizes, std::vector< std::unique_ptr< MGInfoType > > &system_levels, std::vector< std::unique_ptr< CoarsenerInfoType > > &coarsener_levels, - std::vector< std::unique_ptr< SmootherInfoType > > &smoother_levels + std::vector< std::unique_ptr< SmootherInfoType > > &smoother_levels, + const std::vector< size_t > &mg_sizes, + const TelTokenType & tt ) { if( mg_sizes.size() == 0 ) { throw std::invalid_argument( "at least one size should be available" ); } size_t finer_size = mg_sizes[ 0 ]; - system_levels.emplace_back( new MGInfoType( 0, finer_size ) ); // create main system + system_levels.emplace_back( new MGInfoType( tt, 0, finer_size ) ); // create main system smoother_levels.emplace_back( new SmootherInfoType( finer_size ) ); // create smoother for main for( size_t i = 1; i < mg_sizes.size(); i++ ) { size_t coarser_size = mg_sizes[ i ]; @@ -85,7 +87,7 @@ namespace grb { throw std::invalid_argument( "system sizes not monotonically decreasing" ); } coarsener_levels.emplace_back( new CoarsenerInfoType( finer_size, coarser_size ) ); - system_levels.emplace_back( new MGInfoType( i, coarser_size ) ); + system_levels.emplace_back( new MGInfoType( tt, i, coarser_size ) ); smoother_levels.emplace_back( new SmootherInfoType( coarser_size ) ); finer_size = coarser_size; } diff --git a/include/graphblas/algorithms/multigrid/multigrid_cg.hpp b/include/graphblas/algorithms/multigrid/multigrid_cg.hpp index c517c8cc4..2bb936a1c 100644 --- a/include/graphblas/algorithms/multigrid/multigrid_cg.hpp +++ b/include/graphblas/algorithms/multigrid/multigrid_cg.hpp @@ -36,6 +36,8 @@ #include #include +#include +#include #include "multigrid_data.hpp" @@ -82,25 +84,6 @@ namespace grb { } }; - /** - * Container for various options and algebraic abstractions to be passed to a CG simulation with multi-grid. - */ - template < - typename IOType, - typename ResidualType, - class Ring, - class Minus - > struct CGOptions { - bool with_preconditioning; ///< whether preconditioning is enabled - size_t max_iterations; ///< max number of allowed iterations for CG: after that, the solver is halted - ///< and the result achieved so far returned - ResidualType tolerance; ///< ratio between initial residual and current residual that halts the solver - ///< if reached, for the solution is to be considered "good enough" - bool print_iter_residual; ///< whether to print information on the multi-grid and the residual on each iteration - Ring ring; ///< algebraic ring to be used - Minus minus; ///< minus operator to be used - }; - /** * Structure for the output information of a CG run. */ @@ -109,185 +92,6 @@ namespace grb { ResidualType norm_residual; ///< norm of the final residual }; - /** - * Conjugate Gradient algorithm implementation augmented by a Multi-Grid solver, - * inspired to the High Performance Conjugate Gradient benchmark. - * - * This CG solver calls the MG solver at the beginning of each iteration to improve - * the initial solution via the residual (thanks to the smoother) and then proceeds with - * the standard CG iteration. - * - * Failures of GraphBLAS operations are handled by immediately stopping the execution and by returning - * the failure code. - * - * @tparam descr descriptor for static information - * @tparam IOType type of result and intermediate vectors used during computation - * @tparam ResidualType type of the residual norm - * @tparam NonzeroType type of matrix values - * @tparam InputType type of values of the right-hand side vector b - * @tparam MultiGridrunnerType type for the multi-grid runner object - * @tparam Ring algebraic ring type - * @tparam Minus minus operator - * - * @param cg_data data for the CG solver only - * @param cg_opts options for the CG solver - * @param grid_base base (i.e., finer) level of the multi-grid, with the information of the physical system - * @param MultiGridRunner runner object (functor) to call the multi-grid solver - * @param out_info solver output information - * @return grb::RC SUCCESS in case of succesful run - */ - template< - Descriptor descr, - typename IOType, - typename ResidualType, - typename NonzeroType, - typename InputType, - typename MultiGridrunnerType, - class Ring = Semiring< grb::operators::add< IOType >, grb::operators::mul< IOType >, grb::identities::zero, grb::identities::one >, - class Minus = operators::subtract< IOType > - > grb::RC multigrid_conjugate_gradient( - MultiGridCGData< IOType, NonzeroType, InputType > &cg_data, - const CGOptions< IOType, ResidualType, Ring, Minus > &cg_opts, - MultiGridData< IOType, NonzeroType > &grid_base, - MultiGridrunnerType &multigrid_runner, - CGOutInfo< ResidualType > &out_info - ) { - const grb::Matrix< NonzeroType > &A = grid_base.A; // system matrix - grb::Vector< IOType > &r = grid_base.r; // residual vector - grb::Vector< IOType > &z = grid_base.z; // pre-conditioned residual vector - grb::Vector< IOType > &x = cg_data.x; // initial (and final) solution - const grb::Vector< InputType > &b = cg_data.b; // right-side value - grb::Vector< IOType > &p = cg_data.p; // direction vector - grb::Vector< IOType > &Ap = cg_data.u; // temp vector - grb::RC ret = SUCCESS; - - const IOType io_zero = cg_opts.ring.template getZero< IOType >(); - ret = ret ? ret : grb::set( Ap, io_zero ); - ret = ret ? ret : grb::set( r, io_zero ); - ret = ret ? ret : grb::set( p, io_zero ); - - ret = ret ? ret : grb::set( p, x ); - // Ap = A * x - ret = ret ? ret : grb::mxv< descr >( Ap, A, x, cg_opts.ring ); - assert( ret == SUCCESS ); - // r = b - Ap - ret = ret ? ret : grb::eWiseApply< descr >( r, b, Ap, cg_opts.minus ); - assert( ret == SUCCESS ); - - const ResidualType residual_zero = cg_opts.ring.template getZero< ResidualType >(); - ResidualType norm_residual = residual_zero; - // norm_residual = r' * r - ret = ret ? ret : grb::dot< descr >( norm_residual, r, r, cg_opts.ring ); - assert( ret == SUCCESS ); - - // compute sqrt to avoid underflow - norm_residual = std::sqrt( norm_residual ); - - // initial norm of residual - out_info.norm_residual = norm_residual; - const ResidualType norm_residual_initial = norm_residual; - ResidualType old_r_dot_z = residual_zero, r_dot_z = residual_zero, beta = residual_zero; - size_t iter = 0; - -#ifdef HPCG_PRINT_STEPS - DBG_print_norm( p, "start p" ); - DBG_print_norm( Ap, "start Ap" ); - DBG_print_norm( r, "start r" ); -#endif - do { -#ifdef HPCG_PRINT_STEPS - DBG_println( "========= iteration " << iter << " =========" ); -#endif - if( cg_opts.print_iter_residual ) { - std::cout << "iteration " << iter; - } - if( cg_opts.with_preconditioning ) { - ret = ret ? ret : multigrid_runner( grid_base ); - assert( ret == SUCCESS ); - } else { - // z = r - ret = ret ? ret : grb::set( z, r ); - assert( ret == SUCCESS ); - } -#ifdef HPCG_PRINT_STEPS - DBG_print_norm( z, "initial z" ); -#endif - if( iter == 0 ) { - // p = z - ret = ret ? ret : grb::set< descr >( p, z ); - assert( ret == SUCCESS ); - // r_dot_z = r' * z - ret = ret ? ret : grb::dot< descr >( r_dot_z, r, z, cg_opts.ring ); - assert( ret == SUCCESS ); - } else { - old_r_dot_z = r_dot_z; - // r_dot_z = r' * z - r_dot_z = cg_opts.ring.template getZero< ResidualType >(); - ret = ret ? ret : grb::dot< descr >( r_dot_z, r, z, cg_opts.ring ); - assert( ret == SUCCESS ); - - beta = r_dot_z / old_r_dot_z; - // Ap = 0 - ret = ret ? ret : grb::set< descr >( Ap, io_zero ); - assert( ret == SUCCESS ); - // Ap += beta * p - ret = ret ? ret : grb::eWiseMul< descr >( Ap, beta, p, cg_opts.ring ); - assert( ret == SUCCESS ); - // Ap = Ap + z - ret = ret ? ret : grb::eWiseApply< descr >( Ap, Ap, z, cg_opts.ring.getAdditiveOperator() ); - assert( ret == SUCCESS ); - // p = Ap - std::swap( Ap, p ); - assert( ret == SUCCESS ); - } -#ifdef HPCG_PRINT_STEPS - DBG_print_norm( p, "middle p" ); -#endif - // Ap = A * p - ret = ret ? ret : grb::set< descr >( Ap, io_zero ); - ret = ret ? ret : grb::mxv< descr >( Ap, A, p, cg_opts.ring ); - assert( ret == SUCCESS ); -#ifdef HPCG_PRINT_STEPS - DBG_print_norm( Ap, "middle Ap" ); -#endif - // pAp = p' * Ap - ResidualType pAp = cg_opts.ring.template getZero< ResidualType >(); - ret = ret ? ret : grb::dot< descr >( pAp, Ap, p, cg_opts.ring ); - assert( ret == SUCCESS ); - - ResidualType alpha = r_dot_z / pAp; - // x += alpha * p - ret = ret ? ret : grb::eWiseMul< descr >( x, alpha, p, cg_opts.ring ); - assert( ret == SUCCESS ); -#ifdef HPCG_PRINT_STEPS - DBG_print_norm( x, "end x" ); -#endif - // r += - alpha * Ap - ret = ret ? ret : grb::eWiseMul< descr >( r, -alpha, Ap, cg_opts.ring ); - assert( ret == SUCCESS ); -#ifdef HPCG_PRINT_STEPS - DBG_print_norm( r, "end r" ); -#endif - // residual = r' * r - norm_residual = cg_opts.ring.template getZero< ResidualType >(); - ret = ret ? ret : grb::dot< descr >( norm_residual, r, r, cg_opts.ring ); - assert( ret == SUCCESS ); - - norm_residual = std::sqrt( norm_residual ); - - if( cg_opts.print_iter_residual ) { - std::cout << " residual " << norm_residual << std::endl; - } - - ++iter; - out_info.iterations = iter; - out_info.norm_residual = norm_residual; - } while( iter < cg_opts.max_iterations && - norm_residual / norm_residual_initial > cg_opts.tolerance && ret == SUCCESS ); - - return ret; - } - /** * Runner object incapsulating all information to run a Conjugate Gradient solver * with multi-grid. @@ -307,17 +111,21 @@ namespace grb { * @tparam descr descriptors with statically-known data for computation and containers */ template< - typename IOType, - typename NonzeroType, - typename InputType, - typename ResidualType, + typename MGCGTypes, typename MultiGridRunnerType, - class Ring, - class Minus, - Descriptor descr = descriptors::no_operation - > struct MultiGridCGRunner { - + typename TelTokenType, + Descriptor descr = descriptors::no_operation, + typename DbgOutputStreamType = grb::utils::telemetry::OutputStreamOff + > struct MultiGridCGRunner : public grb::utils::telemetry::Timeable< TelTokenType > { + + using IOType = typename MGCGTypes::IOType; + using NonzeroType = typename MGCGTypes::NonzeroType; + using InputType = typename MGCGTypes::InputType; + using ResidualType = typename MGCGTypes::ResidualType; + using Ring = typename MGCGTypes::Ring; + using Minus = typename MGCGTypes::Minus; using HPCGInputType = MultiGridCGData< IOType, NonzeroType, InputType >; + using MGRunnerType = MultiGridRunnerType; static_assert( std::is_default_constructible< Ring >::value, "cannot construct the Ring with default values" ); @@ -326,11 +134,16 @@ namespace grb { static_assert( std::is_move_constructible< MultiGridRunnerType >::value, "cannot construct the Multi-Grid runner by move" ); - // default value: override with your own - CGOptions< IOType, ResidualType, Ring, Minus > cg_opts = { true, 10, - Ring(). template getZero< ResidualType >(), false, Ring(), Minus() }; + Ring ring; ///< algebraic ring to be used + Minus minus; ///< minus operator to be used + bool with_preconditioning = true; ///< whether preconditioning is enabled + size_t max_iterations = 10; ///< max number of allowed iterations for CG: after that, the solver is halted + ///< and the result achieved so far returned + ResidualType tolerance = ring. template getZero< ResidualType >(); ///< ratio between initial residual and current residual that halts the solver + ///< if reached, for the solution is to be considered "good enough" - MultiGridRunnerType mg_runner; + MultiGridRunnerType &mg_runner; + DbgOutputStreamType dbg_logger; /** * Construct a new MultiGridCGRunner object by moving the required MG runner. @@ -339,8 +152,25 @@ namespace grb { * as the state of the MG runner is managed automatically with this object. */ MultiGridCGRunner( - MultiGridRunnerType &&_mg_runner - ) : mg_runner( std::move( _mg_runner ) ) {} + const TelTokenType & tt, + MultiGridRunnerType &_mg_runner + ) : + grb::utils::telemetry::Timeable< TelTokenType >( tt ), + mg_runner( _mg_runner ), + dbg_logger() + { + static_assert( std::is_default_constructible< DbgOutputStreamType >::value ); + } + + MultiGridCGRunner( + const TelTokenType & tt, + MultiGridRunnerType & _mg_runner, + DbgOutputStreamType & _dbg_logger + ) : + grb::utils::telemetry::Timeable< TelTokenType >( tt ), + mg_runner( _mg_runner ), + dbg_logger( _dbg_logger ) + {} /** * Functional operator to invoke a full CG-MG computation. @@ -355,7 +185,154 @@ namespace grb { MultiGridCGData< IOType, NonzeroType, InputType > &cg_data, CGOutInfo< ResidualType > &out_info ) { - return multigrid_conjugate_gradient< descr >( cg_data, cg_opts, grid_base, mg_runner, out_info ); + this->start(); + grb::RC ret = multigrid_conjugate_gradient( cg_data, grid_base, out_info ); + this->stop(); + return ret; + } + + /** + * Conjugate Gradient algorithm implementation augmented by a Multi-Grid solver, + * inspired to the High Performance Conjugate Gradient benchmark. + * + * This CG solver calls the MG solver at the beginning of each iteration to improve + * the initial solution via the residual (thanks to the smoother) and then proceeds with + * the standard CG iteration. + * + * Failures of GraphBLAS operations are handled by immediately stopping the execution and by returning + * the failure code. + * + * + * @param cg_data data for the CG solver only + * @param grid_base base (i.e., finer) level of the multi-grid, with the information of the physical system + * @param out_info solver output information + * @return grb::RC SUCCESS in case of succesful run + */ + grb::RC multigrid_conjugate_gradient( + HPCGInputType &cg_data, + typename MultiGridRunnerType::MultiGridInputType &grid_base, + CGOutInfo< ResidualType > &out_info + ) { + const grb::Matrix< NonzeroType > &A = grid_base.A; // system matrix + grb::Vector< IOType > &r = grid_base.r; // residual vector + grb::Vector< IOType > &z = grid_base.z; // pre-conditioned residual vector + grb::Vector< IOType > &x = cg_data.x; // initial (and final) solution + const grb::Vector< InputType > &b = cg_data.b; // right-side value + grb::Vector< IOType > &p = cg_data.p; // direction vector + grb::Vector< IOType > &Ap = cg_data.u; // temp vector + grb::RC ret = SUCCESS; + + const IOType io_zero = ring.template getZero< IOType >(); + ret = ret ? ret : grb::set( Ap, io_zero ); + ret = ret ? ret : grb::set( r, io_zero ); + ret = ret ? ret : grb::set( p, io_zero ); + + ret = ret ? ret : grb::set( p, x ); + // Ap = A * x + ret = ret ? ret : grb::mxv< descr >( Ap, A, x, ring ); + assert( ret == SUCCESS ); + // r = b - Ap + ret = ret ? ret : grb::eWiseApply< descr >( r, b, Ap, minus ); + assert( ret == SUCCESS ); + + const ResidualType residual_zero = ring.template getZero< ResidualType >(); + ResidualType norm_residual = residual_zero; + // norm_residual = r' * r + ret = ret ? ret : grb::dot< descr >( norm_residual, r, r, ring ); + assert( ret == SUCCESS ); + + // compute sqrt to avoid underflow + norm_residual = std::sqrt( norm_residual ); + + // initial norm of residual + out_info.norm_residual = norm_residual; + const ResidualType norm_residual_initial = norm_residual; + ResidualType old_r_dot_z = residual_zero, r_dot_z = residual_zero, beta = residual_zero; + size_t iter = 0; + + dbg_logger << ">>> start p: " << p << std::endl; + dbg_logger << ">>> start Ap: " << Ap << std::endl; + dbg_logger << ">>> start r: " << r << std::endl; + + do { + dbg_logger << "========= iteration " << iter << " =========" << std::endl; + + if( with_preconditioning ) { + ret = ret ? ret : mg_runner( grid_base ); + assert( ret == SUCCESS ); + } else { + // z = r + ret = ret ? ret : grb::set( z, r ); + assert( ret == SUCCESS ); + } + dbg_logger << ">>> initial z: " << z << std::endl; + + if( iter == 0 ) { + // p = z + ret = ret ? ret : grb::set< descr >( p, z ); + assert( ret == SUCCESS ); + // r_dot_z = r' * z + ret = ret ? ret : grb::dot< descr >( r_dot_z, r, z, ring ); + assert( ret == SUCCESS ); + } else { + old_r_dot_z = r_dot_z; + // r_dot_z = r' * z + r_dot_z = ring.template getZero< ResidualType >(); + ret = ret ? ret : grb::dot< descr >( r_dot_z, r, z, ring ); + assert( ret == SUCCESS ); + + beta = r_dot_z / old_r_dot_z; + // Ap = 0 + ret = ret ? ret : grb::set< descr >( Ap, io_zero ); + assert( ret == SUCCESS ); + // Ap += beta * p + ret = ret ? ret : grb::eWiseMul< descr >( Ap, beta, p, ring ); + assert( ret == SUCCESS ); + // Ap = Ap + z + ret = ret ? ret : grb::eWiseApply< descr >( Ap, Ap, z, ring.getAdditiveOperator() ); + assert( ret == SUCCESS ); + // p = Ap + std::swap( Ap, p ); + assert( ret == SUCCESS ); + } + dbg_logger << ">>> middle p: " << p << std::endl; + + // Ap = A * p + ret = ret ? ret : grb::set< descr >( Ap, io_zero ); + ret = ret ? ret : grb::mxv< descr >( Ap, A, p, ring ); + assert( ret == SUCCESS ); + dbg_logger << ">>> middle Ap: " << Ap << std::endl; + + // pAp = p' * Ap + ResidualType pAp = ring.template getZero< ResidualType >(); + ret = ret ? ret : grb::dot< descr >( pAp, Ap, p, ring ); + assert( ret == SUCCESS ); + + ResidualType alpha = r_dot_z / pAp; + // x += alpha * p + ret = ret ? ret : grb::eWiseMul< descr >( x, alpha, p, ring ); + assert( ret == SUCCESS ); + dbg_logger << ">>> end x: " << x << std::endl; + + // r += - alpha * Ap + ret = ret ? ret : grb::eWiseMul< descr >( r, -alpha, Ap, ring ); + assert( ret == SUCCESS ); + dbg_logger << ">>> end r: " << r << std::endl; + + // residual = r' * r + norm_residual = ring.template getZero< ResidualType >(); + ret = ret ? ret : grb::dot< descr >( norm_residual, r, r, ring ); + assert( ret == SUCCESS ); + + norm_residual = std::sqrt( norm_residual ); + + ++iter; + out_info.iterations = iter; + out_info.norm_residual = norm_residual; + } while( iter < max_iterations && + norm_residual / norm_residual_initial > tolerance && ret == SUCCESS ); + + return ret; } }; diff --git a/include/graphblas/algorithms/multigrid/multigrid_data.hpp b/include/graphblas/algorithms/multigrid/multigrid_data.hpp index 6462e4019..ed580da3d 100644 --- a/include/graphblas/algorithms/multigrid/multigrid_data.hpp +++ b/include/graphblas/algorithms/multigrid/multigrid_data.hpp @@ -28,6 +28,7 @@ #include #include +#include namespace grb { @@ -49,9 +50,12 @@ namespace grb { */ template< typename IOType, - typename NonzeroType + typename NonzeroType, + typename TelTokenType > struct MultiGridData { + grb::utils::telemetry::Stopwatch< TelTokenType > mg_stopwatch; + grb::utils::telemetry::Stopwatch< TelTokenType > sm_stopwatch; const size_t level; ///< level of the grid (0 for the finest physical system) const size_t system_size; ///< size of the system, i.e. side of the #A system matrix grb::Matrix< NonzeroType > A; ///< system matrix @@ -62,9 +66,12 @@ namespace grb { * Construct a new multigrid data object from level information and system size. */ MultiGridData( + const TelTokenType & _tt, size_t _level, size_t sys_size ) : + mg_stopwatch( _tt ), + sm_stopwatch( _tt ), level( _level ), system_size( sys_size ), A( sys_size, sys_size ), @@ -72,9 +79,10 @@ namespace grb { r( sys_size ) {} // for safety, disable copy semantics - MultiGridData( const MultiGridData< IOType, NonzeroType > & o ) = delete; + MultiGridData( const MultiGridData< IOType, NonzeroType, TelTokenType > & o ) = delete; - MultiGridData & operator=( const MultiGridData< IOType, NonzeroType > & ) = delete; + MultiGridData & operator=( + const MultiGridData< IOType, NonzeroType, TelTokenType > & ) = delete; grb::RC init_vectors( IOType zero ) { grb::RC rc = grb::set( z, zero ); diff --git a/include/graphblas/algorithms/multigrid/multigrid_v_cycle.hpp b/include/graphblas/algorithms/multigrid/multigrid_v_cycle.hpp index 177027f3e..6ab53b469 100644 --- a/include/graphblas/algorithms/multigrid/multigrid_v_cycle.hpp +++ b/include/graphblas/algorithms/multigrid/multigrid_v_cycle.hpp @@ -33,133 +33,13 @@ #include #include -#include +#include #include "multigrid_data.hpp" namespace grb { namespace algorithms { - /** - * Multi-grid V cycle implementation to refine a given solution. - * - * A full multi-grid run goes through the following steps: - * - * 1. calls the pre-smoother to improve on the initial solution stored into \p mgiter_begin->z - * 2. coarsens the residual vector - * 3. recursively solves the coarser system - * 4. prolongs the coarser solution into the \p mgiter_begin->z - * 5. further smooths the solution wih a post-smoother call - * - * The algorithm moves across grid levels via the STL-like iterators \p mgiter_begin - * and \p mgiter_end and accesses the grid data via the former (using the operator \c * ): when - * \p mgiter_begin \c == \p mgiter_end , a smoothing round is invoked and the recursion halted. - * - * Failuers of GraphBLAS operations are handled by immediately stopping the execution - * and returning the failure code. - * - * @tparam descr descriptor for static information - * @tparam IOType type of result and intermediate vectors used during computation - * @tparam NonzeroType type of matrix values - * @tparam MGSysIterType type of the iterator across grid levels - * @tparam MGSmootherType type of the smoother runner, with prescribed methods for the various - * smoothing steps - * @tparam CoarsenerType type of the coarsener runner, with prescribed methods for coarsening - * and prolongation - * @tparam Ring the ring of algebraic operators zero-values - * @tparam Minus the minus operator for subtractions - * - * @param mgiter_begin iterator pointing to the current level of the multi-grid - * @param mgiter_end end iterator, indicating the end of the recursion - * @param smoother callable object to invoke the smoothing steps - * @param coarsener callable object to coarsen and prolong (between current and coarser grid levels) - * @param ring the ring to perform the operations on - * @param minus the \f$ - \f$ operator for vector subtractions - * @return grb::RC if the algorithm could correctly terminate, the error code of the first - * unsuccessful operation otherwise - */ - template < - Descriptor descr, - typename IOType, - typename NonzeroType, - typename MGSysIterType, - typename MGSmootherType, - typename CoarsenerType, - class Ring, - class Minus - > grb::RC multi_grid( - MGSysIterType mgiter_begin, - const MGSysIterType mgiter_end, - MGSmootherType &smoother, - CoarsenerType &coarsener, - const Ring &ring, - const Minus &minus - ) { - static_assert( std::is_base_of< MultiGridData< IOType, NonzeroType >, - typename std::decay< decltype( *mgiter_begin ) >::type >::value, "the iterator type MGSysIterType" - " must reference an object of type MultiGridData< IOType, NonzeroType >" ); - - RC ret = SUCCESS; - assert( mgiter_begin != mgiter_end ); - MultiGridData< IOType, NonzeroType > &finer_system = *mgiter_begin; - ++mgiter_begin; - -#ifdef HPCG_PRINT_STEPS - DBG_println( "mg BEGINNING {" ); -#endif - - // clean destination vector - ret = ret ? ret : grb::set< descr >( finer_system.z, ring. template getZero< IOType >() ); -#ifdef HPCG_PRINT_STEPS - DBG_print_norm( finer_system.r, "initial r" ); -#endif - if( !( mgiter_begin != mgiter_end ) ) { - // compute one round of Gauss Seidel and return - ret = ret ? ret : smoother.nonrecursive_smooth( finer_system ); - assert( ret == SUCCESS ); -#ifdef HPCG_PRINT_STEPS - DBG_print_norm( finer_system.z, "smoothed z" ); - DBG_println( "} mg END" ); -#endif - return ret; - } - MultiGridData< IOType, NonzeroType > &coarser_system = *mgiter_begin; - - // pre-smoother - ret = ret ? ret : smoother.pre_smooth( finer_system ); - assert( ret == SUCCESS ); -#ifdef HPCG_PRINT_STEPS - DBG_print_norm( finer_system.z, "pre-smoothed z" ); -#endif - - ret = ret ? ret : coarsener.coarsen_residual( finer_system, coarser_system ); - assert( ret == SUCCESS ); -#ifdef HPCG_PRINT_STEPS - DBG_print_norm( coarser_system.r, "coarse r" ); -#endif - - ret = ret ? ret : multi_grid< descr, IOType, NonzeroType, MGSysIterType, - MGSmootherType, CoarsenerType, Ring, Minus >( mgiter_begin, mgiter_end, - smoother, coarsener, ring, minus ); - assert( ret == SUCCESS ); - - ret = ret ? ret : coarsener.prolong_solution( coarser_system, finer_system ); - assert( ret == SUCCESS ); -#ifdef HPCG_PRINT_STEPS - DBG_print_norm( finer_system.z, "prolonged z" ); -#endif - - // post-smoother - ret = ret ? ret : smoother.post_smooth( finer_system ); - assert( ret == SUCCESS ); -#ifdef HPCG_PRINT_STEPS - DBG_print_norm( finer_system.z, "post-smoothed z" ); - DBG_println( "} mg END" ); -#endif - - return ret; - } - /** * Callable object to invoke the V-cycle multi-grid algorithm, which also requires * a smoother and a coarsener object. @@ -178,50 +58,52 @@ namespace grb { * @tparam descr descriptors with statically-known data for computation and containers */ template< + typename MGTypes, typename MGSmootherType, typename CoarsenerType, - typename IOType, - typename NonzeroType, - class Ring, - class Minus, - Descriptor descr = descriptors::no_operation + typename TelTokenType, + Descriptor descr = descriptors::no_operation, + typename DbgOutputStreamType = grb::utils::telemetry::OutputStreamOff > struct MultiGridRunner { + using self_t = MultiGridRunner< MGTypes, MGSmootherType, CoarsenerType, TelTokenType, descr >; + using IOType = typename MGTypes::IOType; + using NonzeroType = typename MGTypes::NonzeroType; + using Ring = typename MGTypes::Ring; + using Minus = typename MGTypes::Minus; + using MultiGridInputType = MultiGridData< IOType, NonzeroType, TelTokenType >; + using SmootherRunnerType = MGSmootherType; + using CoarsenerRunnerType = CoarsenerType; + static_assert( std::is_default_constructible< Ring >::value, "cannot construct the Ring with default values" ); static_assert( std::is_default_constructible< Minus >::value, "cannot construct the Minus operator with default values" ); - static_assert( std::is_move_constructible< MGSmootherType >::value, - "MGSmootherType must be move-constructible"); - static_assert( std::is_move_constructible< CoarsenerType >::value, - "CoarsenerType must be move-constructible"); - - using MultiGridInputType = MultiGridData< IOType, NonzeroType >; // check the interface between HPCG and MG match static_assert( std::is_base_of< typename MGSmootherType::SmootherInputType, MultiGridInputType >::value, "input type of the Smoother kernel must match the input from Multi-Grid" ); - MGSmootherType smoother_runner; ///< object to run the smoother - CoarsenerType coarsener_runner; ///< object to run the coarsener + MGSmootherType & smoother_runner; ///< object to run the smoother + CoarsenerType & coarsener_runner; ///< object to run the coarsener + DbgOutputStreamType dbg_logger; + std::vector< std::unique_ptr< MultiGridInputType > > system_levels; ///< levels of the grid (finest first) - bool print_duration = false; ///< whether to print the duration of a full multi-grid call - grb::utils::Timer timer; Ring ring; ///< algebraic ring Minus minus; ///< minus operator // operator to extract the reference out of an std::unique_ptr object struct __extractor { - MultiGridInputType & operator()( + MultiGridInputType* operator()( typename std::vector< std::unique_ptr< MultiGridInputType > >::reference &ref ) { - return *ref.get(); + return ref.get(); } - const MultiGridInputType & operator()( + const MultiGridInputType* operator()( typename std::vector< std::unique_ptr< MultiGridInputType > >::const_reference &ref ) const { - return *ref.get(); + return ref.get(); } }; @@ -235,29 +117,129 @@ namespace grb { * smoother and coarsener. */ MultiGridRunner( - MGSmootherType &&_smoother_runner, - CoarsenerType &&_coarsener_runner - ) : smoother_runner( std::move( _smoother_runner ) ), - coarsener_runner( std::move( _coarsener_runner ) ) {} + MGSmootherType &_smoother_runner, + CoarsenerType &_coarsener_runner + ) : smoother_runner( _smoother_runner ), + coarsener_runner( _coarsener_runner ) + { + static_assert( std::is_default_constructible< DbgOutputStreamType >::value ); + } + + MultiGridRunner( + MGSmootherType &_smoother_runner, + CoarsenerType &_coarsener_runner, + DbgOutputStreamType & _dbg_logger + ) : smoother_runner( _smoother_runner ), + coarsener_runner( _coarsener_runner ), + dbg_logger( _dbg_logger ) + {} /** * Operator to invoke a full multi-grid run starting from the given level. */ inline grb::RC operator()( MultiGridInputType &system ) { - if( print_duration ) { - timer.reset(); - } - grb::RC ret = multi_grid< descr, IOType, NonzeroType, __unique_ptr_extractor, - MGSmootherType, CoarsenerType, Ring, Minus >( - __unique_ptr_extractor( system_levels.begin() += system.level ), - __unique_ptr_extractor( system_levels.end() ), - smoother_runner, coarsener_runner, ring, minus ); - if( print_duration ) { - double duration = timer.time(); - std::cout << " pre-conditioner (ms) "<< duration; + return this->operator()( __unique_ptr_extractor( system_levels.begin() += system.level ), + __unique_ptr_extractor( system_levels.end() ) ); + } + + inline grb::RC operator()( + __unique_ptr_extractor begin, + const __unique_ptr_extractor end + ) { + begin->mg_stopwatch.start(); + grb::RC ret = multi_grid( begin, end ); + begin->mg_stopwatch.stop(); + return ret; + } + + /** + * Multi-grid V cycle implementation to refine a given solution. + * + * A full multi-grid run goes through the following steps: + * + * 1. calls the pre-smoother to improve on the initial solution stored into \p mgiter_begin->z + * 2. coarsens the residual vector + * 3. recursively solves the coarser system + * 4. prolongs the coarser solution into the \p mgiter_begin->z + * 5. further smooths the solution wih a post-smoother call + * + * The algorithm moves across grid levels via the STL-like iterators \p mgiter_begin + * and \p mgiter_end and accesses the grid data via the former (using the operator \c * ): when + * \p mgiter_begin \c == \p mgiter_end , a smoothing round is invoked and the recursion halted. + * + * Failuers of GraphBLAS operations are handled by immediately stopping the execution + * and returning the failure code. + * + * @tparam descr descriptor for static information + * @tparam IOType type of result and intermediate vectors used during computation + * @tparam NonzeroType type of matrix values + * @tparam MGSysIterType type of the iterator across grid levels + * @tparam MGSmootherType type of the smoother runner, with prescribed methods for the various + * smoothing steps + * @tparam CoarsenerType type of the coarsener runner, with prescribed methods for coarsening + * and prolongation + * @tparam Ring the ring of algebraic operators zero-values + * @tparam Minus the minus operator for subtractions + * + * @param mgiter_begin iterator pointing to the current level of the multi-grid + * @param mgiter_end end iterator, indicating the end of the recursion + * @param smoother callable object to invoke the smoothing steps + * @param coarsener callable object to coarsen and prolong (between current and coarser grid levels) + * @param ring the ring to perform the operations on + * @param minus the \f$ - \f$ operator for vector subtractions + * @return grb::RC if the algorithm could correctly terminate, the error code of the first + * unsuccessful operation otherwise + */ + grb::RC multi_grid( + __unique_ptr_extractor mgiter_begin, + const __unique_ptr_extractor mgiter_end + ) { + RC ret = SUCCESS; + assert( mgiter_begin != mgiter_end ); + MultiGridInputType &finer_system = *mgiter_begin; + ++mgiter_begin; + + dbg_logger << "mg BEGINNING {" << std::endl; + + // clean destination vector + ret = ret ? ret : grb::set< descr >( finer_system.z, ring. template getZero< IOType >() ); + dbg_logger << ">>> initial r: " << finer_system.r << std::endl; + + if( !( mgiter_begin != mgiter_end ) ) { + // compute one round of Gauss Seidel and return + ret = ret ? ret : smoother_runner.nonrecursive_smooth( finer_system ); + assert( ret == SUCCESS ); + dbg_logger << ">>> smoothed z: " << finer_system.z << std::endl; + dbg_logger << "} mg END" << std::endl; + return ret; } + MultiGridInputType &coarser_system = *mgiter_begin; + + // pre-smoother + ret = ret ? ret : smoother_runner.pre_smooth( finer_system ); + assert( ret == SUCCESS ); + dbg_logger << ">>> pre-smoothed z: " << finer_system.z << std::endl; + + ret = ret ? ret : coarsener_runner.coarsen_residual( finer_system, coarser_system ); + assert( ret == SUCCESS ); + dbg_logger << ">>> coarse r: " << coarser_system.r << std::endl; + + ret = ret ? ret : this->operator()( mgiter_begin, mgiter_end ); + assert( ret == SUCCESS ); + + ret = ret ? ret : coarsener_runner.prolong_solution( coarser_system, finer_system ); + assert( ret == SUCCESS ); + dbg_logger << ">>> prolonged z: " << finer_system.z << std::endl; + + // post-smoother + ret = ret ? ret : smoother_runner.post_smooth( finer_system ); + assert( ret == SUCCESS ); + dbg_logger << ">>> post-smoothed z: " << finer_system.z << std::endl; + dbg_logger << "} mg END" << std::endl; + return ret; } + }; } // namespace algorithms diff --git a/include/graphblas/algorithms/multigrid/red_black_gauss_seidel.hpp b/include/graphblas/algorithms/multigrid/red_black_gauss_seidel.hpp index 3193b46fe..02d0c5dd4 100644 --- a/include/graphblas/algorithms/multigrid/red_black_gauss_seidel.hpp +++ b/include/graphblas/algorithms/multigrid/red_black_gauss_seidel.hpp @@ -59,64 +59,125 @@ namespace grb { } }; - namespace internal { + /** + * Runner object for the RBGS smoother, with multiple methods for each type of smoothing step: + * pre-, post- and non-recursive, as invoked during a full run of a multi-grid V-cycle. + * + * It stores the information to smooth each level of the grid, to be initalized separately. + * + * @tparam IOType type of result and intermediate vectors used during computation + * @tparam NonzeroType type of matrix values + * @tparam Ring the ring of algebraic operators + * @tparam descr descriptors with statically-known data for computation and containers + */ + template < + class SmootherTypes, + typename TelTokenType, + Descriptor descr = descriptors::no_operation + > struct RedBlackGSSmootherRunner { + + using IOType = typename SmootherTypes::IOType; + using NonzeroType = typename SmootherTypes::NonzeroType; + using Ring = typename SmootherTypes::Ring; + using SmootherInputType = MultiGridData< IOType, NonzeroType, TelTokenType >; + using SmootherDataType = SmootherData< IOType >; + + size_t presmoother_steps = 1UL; ///< number of pre-smoother steps + size_t postsmoother_steps = 1UL; ///< number of post-smoother steps + size_t non_recursive_smooth_steps = 1UL; ///< number of smoother steps for the last grid level + std::vector< std::unique_ptr< SmootherDataType > > levels; ///< for each grid level, + ///< the smoothing data (finest first) + Ring ring; ///< the algebraic ring + + static_assert( std::is_default_constructible< Ring >::value, + "cannot construct the Ring operator with default values" ); + + + + inline grb::RC pre_smooth( SmootherInputType& data ) { + return run_smoother( data, presmoother_steps ); + } + + inline grb::RC post_smooth( SmootherInputType& data ) { + return run_smoother( data, postsmoother_steps ); + } + + inline grb::RC nonrecursive_smooth( SmootherInputType& data ) { + return run_smoother( data, non_recursive_smooth_steps ); + } + + protected: /** - * Runs a single step of Red-Black Gauss-Seidel for a specific color. + * Runs \p smoother_steps iteration of the Red-Black Gauss-Seidel smoother, + * with inputs and outputs stored inside \p data. * - * @tparam descr descriptor for static information - * @tparam IOType type of result and intermediate vectors used during computation - * @tparam NonzeroType type of matrix values - * @tparam Ring the ring of algebraic operators zero-values + * This is an internal method called by all user-facing methods, because this specific + * smoother performs all smoothing steps the same way. + */ + grb::RC run_smoother( + SmootherInputType &data, + const size_t smoother_steps + ) { + RC ret = SUCCESS; + + SmootherDataType &smoothing_info = *( levels.at( data.level ).get() ); + + data.sm_stopwatch.start(); + for( size_t i = 0; i < smoother_steps && ret == SUCCESS; i++ ) { + ret = ret ? ret : red_black_gauss_seidel( data, smoothing_info ); + assert( ret == SUCCESS ); + } + data.sm_stopwatch.stop(); + return ret; + } + + /** + * Runs a single step of Red-Black Gauss-Seidel for a specific color. * * @param[in] A the system matrix * @param[in] A_diagonal a vector storing the diagonal elements of \p A * @param[in] r the residual - * @param[in,out] x the initial solution to start from, and where the smoothed solution is stored to + * @param[in,out] z the initial solution to start from, and where the smoothed solution is stored to * @param[out] smoother_temp a vector for temporary values * @param[in] color_mask the mask of colors to filter the rows to smooth - * @param[in] ring the ring to perform the operations on * @return grb::RC::SUCCESS if the algorithm could correctly terminate, the error code of the first * unsuccessful operation otherwise */ - template< - Descriptor descr, - typename IOType, - typename NonzeroType, - class Ring - > grb::RC rbgs_single_step( - const grb::Matrix< NonzeroType > & A, - const grb::Vector< IOType > & A_diagonal, - const grb::Vector< IOType > & r, - grb::Vector< IOType > & x, - grb::Vector< IOType > & smoother_temp, - const grb::Vector< bool > & color_mask, - const Ring & ring + grb::RC red_black_gauss_seidel_single_step( + SmootherInputType &data, + SmootherDataType &smoothing_info, + size_t color ) { - RC ret = SUCCESS; - - // smoother_temp[color_mask] = A[color_mask] * x[color_mask] + const grb::Matrix< NonzeroType > & A = data.A; + const grb::Vector< IOType > & A_diagonal = smoothing_info.A_diagonal; + const grb::Vector< IOType > & r = data.r; + grb::Vector< IOType > & z = data.z; + grb::Vector< IOType > & smoother_temp = smoothing_info.smoother_temp; + const grb::Vector< bool > & color_mask = smoothing_info.color_masks[ color ]; + + // smoother_temp[color_mask] = A[color_mask] * z[color_mask] // use the structural descriptors, assuming ONLY the values of the current color are set // note that if this assumption does not hold, also the following eWiseLambda() is wrong - ret = ret ? ret : grb::mxv< grb::descriptors::safe_overlap | grb::descriptors::structural >( - smoother_temp, color_mask, A, x, ring ); + RC ret = grb::mxv< grb::descriptors::safe_overlap | grb::descriptors::structural >( + smoother_temp, color_mask, A, z, ring ); assert( ret == SUCCESS ); // TODO internal issue #201 // Replace below with masked calls: - // x[mask] = r[mask] - smoother_temp[mask] + x[mask] .* diagonal[mask] - // x[mask] = x[maks] ./ diagonal[mask] + // z[mask] = r[mask] - smoother_temp[mask] + z[mask] .* diagonal[mask] + // z[mask] = z[maks] ./ diagonal[mask] ret = ret ? ret : grb::eWiseLambda( - [ &x, &r, &smoother_temp, &color_mask, &A_diagonal ]( const size_t i ) { + [ &z, &r, &smoother_temp, &color_mask, &A_diagonal ]( const size_t i ) { // if the mask was properly initialized, the check on the mask value is unnecessary; // if( color_mask[ i ] ) { IOType d = A_diagonal[ i ]; - IOType v = r[ i ] - smoother_temp[ i ] + x[ i ] * d; - x[ i ] = v / d; + IOType v = r[ i ] - smoother_temp[ i ] + z[ i ] * d; + z[ i ] = v / d; // } }, - color_mask, x, r, smoother_temp, A_diagonal ); + color_mask, z, r, smoother_temp, A_diagonal ); assert( ret == SUCCESS ); return ret; } @@ -131,25 +192,13 @@ namespace grb { * and no check is performed to ensure these assumptions hold. Hence, it is up to user logic * to pass correct coloring information. Otherwise, \b no guarantees hold on the result. * - * @tparam descr descriptor for static information - * @tparam IOType type of result and intermediate vectors used during computation - * @tparam NonzeroType type of matrix values - * @tparam Ring the ring of algebraic operators zero-values - * * @param[in,out] data structure with the data of a single grid level - * @param[in] ring the ring to perform the operations on * @return grb::RC::SUCCESS if the algorithm could correctly terminate, the error code of the first * unsuccessful operation otherwise */ - template< - Descriptor descr, - typename IOType, - typename NonzeroType, - class Ring - > grb::RC red_black_gauss_seidel( - MultiGridData< IOType, NonzeroType > &data, - SmootherData< IOType > &smoothing_info, - const Ring & ring + grb::RC red_black_gauss_seidel( + SmootherInputType &data, + SmootherDataType &smoothing_info ) { RC ret = SUCCESS; // zero the temp output just once, assuming proper masking avoids @@ -158,88 +207,16 @@ namespace grb { ring. template getZero< IOType >() ); // forward step - using cit_t = typename std::vector< grb::Vector< bool > >::const_iterator; - cit_t end = smoothing_info.color_masks.cend(); - for( cit_t it = smoothing_info.color_masks.cbegin(); it != end && ret == SUCCESS; ++it ) { - ret = rbgs_single_step< descr >( data.A, smoothing_info.A_diagonal, data.r, - data.z, smoothing_info.smoother_temp, *it, ring ); + for( size_t color = 0; color < smoothing_info.color_masks.size(); ++color ) { + ret = red_black_gauss_seidel_single_step( data, smoothing_info, color ); } ret = ret ? ret : grb::set< descr >( smoothing_info.smoother_temp, ring. template getZero< IOType >() ); // backward step - using crit_t = typename std::vector< grb::Vector< bool > >::const_reverse_iterator; - crit_t rend = smoothing_info.color_masks.crend(); - for( crit_t rit = smoothing_info.color_masks.crbegin(); rit != rend && ret == SUCCESS; ++rit ) { - ret = rbgs_single_step< descr >( data.A, smoothing_info.A_diagonal, data.r, - data.z, smoothing_info.smoother_temp, *rit, ring ); - } - return ret; - } - - } // namespace internal - - /** - * Runner object for the RBGS smoother, with multiple methods for each type of smoothing step: - * pre-, post- and non-recursive, as invoked during a full run of a multi-grid V-cycle. - * - * It stores the information to smooth each level of the grid, to be initalized separately. - * - * @tparam IOType type of result and intermediate vectors used during computation - * @tparam NonzeroType type of matrix values - * @tparam Ring the ring of algebraic operators - * @tparam descr descriptors with statically-known data for computation and containers - */ - template < - typename IOType, - typename NonzeroType, - class Ring, - Descriptor descr = descriptors::no_operation - > struct RedBlackGSSmootherRunner { - - size_t presmoother_steps; ///< number of pre-smoother steps - size_t postsmoother_steps; ///< number of post-smoother steps - size_t non_recursive_smooth_steps; ///< number of smoother steps for the last grid level - std::vector< std::unique_ptr< SmootherData< IOType > > > levels; ///< for each grid level, - ///< the smoothing data (finest first) - Ring ring; ///< the algebraic ring - - static_assert( std::is_default_constructible< Ring >::value, - "cannot construct the Ring operator with default values" ); - - using SmootherInputType = MultiGridData< IOType, NonzeroType >; - - inline grb::RC pre_smooth( SmootherInputType& data ) { - return __run_smoother( data, presmoother_steps ); - } - - inline grb::RC post_smooth( SmootherInputType& data ) { - return __run_smoother( data, postsmoother_steps ); - } + for( size_t color = smoothing_info.color_masks.size(); color > 0; --color ) { + ret = red_black_gauss_seidel_single_step( data, smoothing_info, color - 1 ); - inline grb::RC nonrecursive_smooth( SmootherInputType& data ) { - return __run_smoother( data, non_recursive_smooth_steps ); - } - - /** - * Runs \p smoother_steps iteration of the Red-Black Gauss-Seidel smoother, - * with inputs and outputs stored inside \p data. - * - * This is an internal method called by all user-facing methods, because this specific - * smoother performs all smoothing steps the same way. - */ - grb::RC __run_smoother( - SmootherInputType &data, - const size_t smoother_steps - ) { - RC ret = SUCCESS; - - SmootherData< IOType > &smoothing_info = *( levels.at( data.level ).get() ); - - for( size_t i = 0; i < smoother_steps && ret == SUCCESS; i++ ) { - ret = ret ? ret : internal::red_black_gauss_seidel< descr >( - data, smoothing_info, ring ); - assert( ret == SUCCESS ); } return ret; } diff --git a/include/graphblas/algorithms/multigrid/single_matrix_coarsener.hpp b/include/graphblas/algorithms/multigrid/single_matrix_coarsener.hpp index f2b008e6f..3d1fee648 100644 --- a/include/graphblas/algorithms/multigrid/single_matrix_coarsener.hpp +++ b/include/graphblas/algorithms/multigrid/single_matrix_coarsener.hpp @@ -61,39 +61,82 @@ namespace grb { } }; - namespace internal { + /** + * Runner structure, holding the data to coarsen the levels of a multi-grid simulation. + * + * This coarsener just uses the same matrix to perform the coarsening (via an mxv()) + * and the prolongation, using it transposed. + */ + template< + class CoarsenerTypes, + typename TelTokenType, + Descriptor descr = descriptors::no_operation + > struct SingleMatrixCoarsener { + + using IOType = typename CoarsenerTypes::IOType; + using NonzeroType = typename CoarsenerTypes::NonzeroType; + using Ring = typename CoarsenerTypes::Ring; + using Minus = typename CoarsenerTypes::Minus; + using MultiGridInputType = MultiGridData< IOType, NonzeroType, TelTokenType >; + using CoarseningDataType = CoarseningData< IOType, NonzeroType >; + + static_assert( std::is_default_constructible< Ring >::value, + "cannot construct the Ring with default values" ); + static_assert( std::is_default_constructible< Minus >::value, + "cannot construct the Minus operator with default values" ); + + /** + * Data to coarsen each level, from finer to coarser. + */ + std::vector< std::unique_ptr< grb::algorithms::CoarseningData< IOType, + NonzeroType > > > coarsener_levels; + Ring ring; + Minus minus; + + /** + * Method required by MultiGridRunner before the recursive call, to coarsen + * the residual vector of \p finer (the finer system) into the residual of + * \p coarser (the coarser system). + */ + inline grb::RC coarsen_residual( + const MultiGridInputType &finer, + MultiGridInputType &coarser + ) { + // first compute the residual + CoarseningData< IOType, NonzeroType > &coarsener = *coarsener_levels[ finer.level ]; + grb::RC ret = grb::set< descr >( coarsener.Ax_finer, ring. template getZero< IOType >() ); + ret = ret ? ret : grb::mxv< descr >( coarsener.Ax_finer, finer.A, finer.z, ring ); + + return ret ? ret : compute_coarsening( finer.r, coarser.r, coarsener ); + } + /** + * Method required by MultiGridRunner after the recursive call, to "prolong" the coarser solution + * into the finer solution. + */ + inline grb::RC prolong_solution( + const MultiGridInputType &coarser, + MultiGridInputType &finer + ) { + return compute_prolongation( coarser.z, finer.z, *coarsener_levels[ finer.level ] ); + } + + protected: /** * computes the coarser residual vector \p CoarseningData.r by coarsening * \p coarsening_data.Ax_finer - \p r_fine via \p coarsening_data.coarsening_matrix. * * The coarsening information are stored inside \p CoarseningData. * - * @tparam descr descriptor for static information - * @tparam IOType type of result and intermediate vectors used during computation - * @tparam NonzeroType type of matrix values - * @tparam Ring the ring of algebraic operators zero-values - * @tparam Minus the minus operator for subtractions - * * @param[in] r_fine fine residual vector * @param[in,out] coarsening_data \ref MultiGridData data structure storing the information for coarsening - * @param[in] ring the ring to perform the operations on - * @param[in] minus the \f$ - \f$ operator for vector subtractions * @return grb::RC::SUCCESS if the algorithm could correctly terminate, the error code of the first * unsuccessful operation otherwise */ - template< - Descriptor descr, - typename IOType, - typename NonzeroType, - class Ring, - class Minus - > grb::RC compute_coarsening( + grb::RC compute_coarsening( const grb::Vector< IOType > & r_fine, // fine residual grb::Vector< IOType > & r_coarse, // fine residual - CoarseningData< IOType, NonzeroType > & coarsening_data, - const Ring & ring, - const Minus & minus + CoarseningData< IOType, NonzeroType > & coarsening_data ) { RC ret = SUCCESS; ret = ret ? ret : grb::eWiseApply< descr >( coarsening_data.Ax_finer, r_fine, @@ -110,104 +153,33 @@ namespace grb { /** * computes the prolongation of the coarser solution \p coarsening_data.z and stores it into - * \p x_fine. + * \p z_fine. * * For prolongation, this function uses the matrix \p coarsening_data.coarsening_matrix by transposing it. * - * @tparam descr descriptor for static information - * @tparam IOType type of result and intermediate vectors used during computation - * @tparam NonzeroType type of matrix values - * @tparam Ring the ring of algebraic operators zero-values - * - * @param[out] x_fine the solution vector to store the prolonged solution into + * @param[out] z_fine the solution vector to store the prolonged solution into * @param[in,out] coarsening_data information for coarsening - * @param[in] ring the ring to perform the operations on * @return grb::RC::SUCCESS if the algorithm could correctly terminate, the error code of the first * unsuccessful operation otherwise */ - template< - Descriptor descr, - typename IOType, - typename NonzeroType, - class Ring - > grb::RC compute_prolongation( + grb::RC compute_prolongation( const grb::Vector< IOType > & z_coarse, - grb::Vector< IOType > & x_fine, // fine residual - grb::algorithms::CoarseningData< IOType, NonzeroType > & coarsening_data, - const Ring & ring + grb::Vector< IOType > & z_fine, // fine residual + grb::algorithms::CoarseningData< IOType, NonzeroType > & coarsening_data ) { RC ret = SUCCESS; // actual refining, from *coarsening_data->syztem_size == nrows(*coarsening_data->A) / 8 - // to nrows(x_fine) + // to nrows(z_fine) ret = ret ? ret : grb::set< descr >( coarsening_data.Ax_finer, ring.template getZero< IOType >() ); ret = ret ? ret : grb::mxv< descr | grb::descriptors::transpose_matrix >( coarsening_data.Ax_finer, coarsening_data.coarsening_matrix, z_coarse, ring ); assert( ret == SUCCESS ); - ret = ret ? ret : grb::foldl< descr >( x_fine, coarsening_data.Ax_finer, ring.getAdditiveMonoid() ); // x_fine += Ax_finer; + ret = ret ? ret : grb::foldl< descr >( z_fine, coarsening_data.Ax_finer, ring.getAdditiveMonoid() ); // z_fine += Ax_finer; assert( ret == SUCCESS ); return ret; } - - } // namespace internal - - /** - * Runner structure, holding the data to coarsen the levels of a multi-grid simulation. - * - * This coarsener just uses the same matrix to perform the coarsening (via an mxv()) - * and the prolongation, using it transposed. - */ - template< - typename IOType, - typename NonzeroType, - class Ring, - class Minus, - Descriptor descr = descriptors::no_operation - > struct SingleMatrixCoarsener { - - static_assert( std::is_default_constructible< Ring >::value, - "cannot construct the Ring with default values" ); - static_assert( std::is_default_constructible< Minus >::value, - "cannot construct the Minus operator with default values" ); - - using MultiGridInputType = MultiGridData< IOType, NonzeroType >; - - /** - * Data to coarsen each level, from finer to coarser. - */ - std::vector< std::unique_ptr< grb::algorithms::CoarseningData< IOType, - NonzeroType > > > coarsener_levels; - Ring ring; - Minus minus; - - /** - * Method required by MultiGridRunner before the recursive call, to coarsen - * the residual vector of \p finer (the finer system) into the residual of - * \p coarser (the coarser system). - */ - inline grb::RC coarsen_residual( - const MultiGridInputType &finer, - MultiGridInputType &coarser - ) { - // first compute the residual - CoarseningData< IOType, NonzeroType > &coarsener = *coarsener_levels[ finer.level ]; - grb::RC ret = grb::set< descr >( coarsener.Ax_finer, ring. template getZero< IOType >() ); - ret = ret ? ret : grb::mxv< descr >( coarsener.Ax_finer, finer.A, finer.z, ring ); - - return internal::compute_coarsening< descr >( finer.r, coarser.r, coarsener, ring, minus ); - } - - /** - * Method required by MultiGridRunner after the recursive call, to "prolong" the coarser solution - * into the finer solution. - */ - inline grb::RC prolong_solution( - const MultiGridInputType &coarser, - MultiGridInputType &finer - ) { - return internal::compute_prolongation< descr >( coarser.z, finer.z, *coarsener_levels[ finer.level ], ring ); - } }; } // namespace algorithms diff --git a/tests/smoke/hpcg.cpp b/tests/smoke/hpcg.cpp index f7cd05787..adba0339a 100644 --- a/tests/smoke/hpcg.cpp +++ b/tests/smoke/hpcg.cpp @@ -31,33 +31,23 @@ #include #include #include +#include +#include +#include +#include #include -//========== TRACE SOLVER STEPS ========= -// to easily trace the steps of the solver, just define this symbol -// #define HPCG_PRINT_STEPS +#include +#include +#include +#include -// here we define a custom macro, which enables tracing only for HPCG code -#ifdef HPCG_PRINT_STEPS -#include - -// HPCG_PRINT_STEPS requires defining the following symbols - -// prints args on a dedicated line -#define DBG_println( args ) std::cout << args << std::endl; -// forward declaration for the tracing facility -template< typename T > void print_norm( const grb::Vector< T > &r, const char * head ); -// prints head and the norm of r -#define DBG_print_norm( vec, head ) print_norm( vec, head ) -#endif -//============================================ - -#include #include #include #include +#include #include #include @@ -84,33 +74,88 @@ using namespace grb; using namespace algorithms; static const char * const TEXT_HIGHLIGHT = "===> "; -#define thcout ( std::cout << TEXT_HIGHLIGHT ) -#define thcerr ( std::cerr << TEXT_HIGHLIGHT ) -#define MASTER_PRINT( pid, txt ) if( pid == 0 ) { std::cout << txt; } // default types -using IOType = double; -using NonzeroType = double; -using InputType = double; -using ResidualType = double; -using StdRing = Semiring< grb::operators::add< NonzeroType >, grb::operators::mul< NonzeroType >, - grb::identities::zero, grb::identities::one >; -using StdMinus = operators::subtract< NonzeroType >; +using value_t = double; + +struct HPCGTypes { + using IOType = value_t; + using NonzeroType = value_t; + using InputType = value_t; + using ResidualType = value_t; + using Ring = Semiring< grb::operators::add< NonzeroType >, grb::operators::mul< NonzeroType >, + grb::identities::zero, grb::identities::one >; + using Minus = operators::subtract< NonzeroType >; + using Divide = operators::divide< NonzeroType >; +}; + +using IOType = typename HPCGTypes::IOType; +using NonzeroType = typename HPCGTypes::NonzeroType; +using InputType = typename HPCGTypes::InputType; +using ResidualType = typename HPCGTypes::ResidualType; +using Ring = typename HPCGTypes::Ring; + using coord_t = size_t; + constexpr Descriptor hpcg_desc = descriptors::dense; +DECLARE_TELEMETRY_TOKEN( DistOut ) +ACTIVATE_TOKEN( DistOut ) +using dist_token_t = TELEMETRY_TOKEN_TYPE( DistOut ); +using DistStream = grb::utils::telemetry::OutputStream< dist_token_t >; + +DECLARE_TELEMETRY_TOKEN( HPCGTelemetry ) +ACTIVATE_TOKEN( HPCGTelemetry ) +using hpcg_token_t = TELEMETRY_TOKEN_TYPE( HPCGTelemetry ); + +DECLARE_TELEMETRY_TOKEN( MGTelemetry ) +ACTIVATE_TOKEN( MGTelemetry ) +using mg_token_t = TELEMETRY_TOKEN_TYPE( MGTelemetry ); + +DECLARE_TELEMETRY_TOKEN( DBGToken ) +// ACTIVATE_TOKEN( DBGToken ) +using dbg_token_t = TELEMETRY_TOKEN_TYPE( DBGToken ); +using DBGStream = grb::utils::telemetry::OutputStream< dbg_token_t >; + +using duration_t = utils::telemetry::duration_nano_t; +using hpcg_csv_t = utils::telemetry::CSVWriter< hpcg_token_t, hpcg_token_t::enabled, size_t, duration_t >; +using mg_csv_t = utils::telemetry::CSVWriter< mg_token_t, mg_token_t::enabled, size_t, size_t, duration_t, duration_t >; + // assembled types for simulation runners and input/output structures -using hpcg_runner_t = HPCGRunnerType< hpcg_desc, IOType, NonzeroType, InputType, ResidualType, - StdRing, StdMinus >; -using mg_data_t = MultiGridData< IOType, NonzeroType >; -using coarsening_data_t = CoarseningData< IOType, NonzeroType >; -using smoothing_data_t = SmootherData< IOType >; -using hpcg_data_t = MultiGridCGData< IOType, NonzeroType, InputType >; - -static const IOType io_zero = StdRing(). template getZero< IOType >(); -static const NonzeroType nz_zero = StdRing(). template getZero< NonzeroType >(); -static const InputType input_zero = StdRing(). template getZero< InputType >(); -static const ResidualType residual_zero = StdRing(). template getZero< ResidualType >(); +using smoother_runner_t = grb::algorithms::RedBlackGSSmootherRunner< HPCGTypes, mg_token_t, hpcg_desc >; +using smoothing_data_t = typename smoother_runner_t::SmootherDataType; + +using coarsener_runner_t = grb::algorithms::SingleMatrixCoarsener< HPCGTypes, mg_token_t, hpcg_desc >; +using coarsening_data_t = typename coarsener_runner_t::CoarseningDataType; + +using mg_runner_t = MultiGridRunner< HPCGTypes, smoother_runner_t, coarsener_runner_t, mg_token_t, hpcg_desc, DBGStream >; +using mg_data_t = typename mg_runner_t::MultiGridInputType; + +using hpcg_runner_t = MultiGridCGRunner< HPCGTypes, mg_runner_t, hpcg_token_t, hpcg_desc, DBGStream >; +using hpcg_data_t = typename hpcg_runner_t::HPCGInputType; + +struct dotter : grb::utils::telemetry::OutputStreamLazy { + const grb::Vector< IOType > &v; + dotter( const grb::Vector< IOType > &_v ) : v( _v ) {} + ResidualType operator()() const { + Ring ring; + ResidualType r = 0; + grb::dot( r, v, v, ring ); + return r; + } +}; + +static inline DBGStream & operator<<( DBGStream & stream, const grb::Vector< IOType > & v ) { + stream << std::setprecision( 7 ); + return stream << dotter( v ); +} + +static const IOType io_zero = Ring(). template getZero< IOType >(); +static const NonzeroType nz_zero = Ring(). template getZero< NonzeroType >(); +static const InputType input_zero = Ring(). template getZero< InputType >(); +static const ResidualType residual_zero = Ring(). template getZero< ResidualType >(); + +static constexpr size_t MAX_CSV_PATH_LENGTH = 255; /** * Container for the parameters for the HPCG simulation. @@ -126,7 +171,18 @@ struct simulation_input { size_t smoother_steps; bool evaluation_run; bool no_preconditioning; - bool print_iter_stats; + // logging options: these are serializable for launcher invocation + std::array< char, MAX_CSV_PATH_LENGTH + 1 > hpcg_csv; + std::array< char, MAX_CSV_PATH_LENGTH + 1 > mg_csv; + bool hpcg_log; + bool mg_log; + + simulation_input() { + hpcg_csv[ 0 ] = '\0'; + mg_csv[ 0 ] = '\0'; + } + + simulation_input( const simulation_input & ) = default; }; /** @@ -155,35 +211,6 @@ static void print_system( } #endif -//========== ROUTINES TO TRACE SOLVER STEPS ========= -#ifdef HPCG_PRINT_STEPS -template< - typename T, - class Ring -> void print_norm( const grb::Vector< T > & r, const char * head, const Ring & ring ) { - T norm = ring. template getZero< T >(); - RC ret = grb::dot( norm, r, r, ring ); // norm = r' * r; - (void)ret; - assert( ret == SUCCESS ); - if( spmd<>::pid() != 0 ) { - return; - } - // printf makes more likely to get single lineas in output with multiple processes - // additionally, it doesn't approximate double values - if( head != nullptr ) { - printf(">>> %s: %lf\n", head, norm ); - } else { - printf(">>> %lf\n", norm ); - } -} - -template< typename T > void print_norm( const grb::Vector< T > & r, const char * head ) { - return print_norm( r, head, StdRing() ); -} -#endif -//============================================ - - /** * Allocates the data structure input to the various simulation steps (CG, multi-grid, coarsening, smoothing) * for each level of the multi-grid. The input is the vector of system sizes \p mg_sizes, with sizes in @@ -212,25 +239,26 @@ template< typename T > T static next_pow_2( T n ) { * explained in \ref multigrid_allocate_data(). */ static void allocate_system_structures( - const std::vector< size_t > &mg_sizes, std::vector< std::unique_ptr< mg_data_t > > &system_levels, std::vector< std::unique_ptr< coarsening_data_t > > &coarsener_levels, std::vector< std::unique_ptr< smoothing_data_t > > &smoother_levels, - std::unique_ptr< hpcg_data_t > &cg_system_data + std::unique_ptr< hpcg_data_t > &cg_system_data, + const std::vector< size_t > &mg_sizes, + const mg_token_t & mg_token, + DistStream & logger ) { - const size_t pid = spmd<>::pid() ; grb::utils::Timer timer; hpcg_data_t *data = new hpcg_data_t( mg_sizes[ 0 ] ); cg_system_data = std::unique_ptr< hpcg_data_t >( data ); - MASTER_PRINT( pid, "allocating data for the MultiGrid simulation..."); + logger << "allocating data for the MultiGrid simulation..."; timer.reset(); - multigrid_allocate_data( mg_sizes, system_levels, coarsener_levels, smoother_levels ); + multigrid_allocate_data( system_levels, coarsener_levels, smoother_levels, mg_sizes, mg_token ); double time = timer.time(); - MASTER_PRINT( pid, " time (ms) " << time << std::endl ) + logger << " time (ms) " << time << std::endl; // zero all vectors - MASTER_PRINT( pid, "zeroing all vectors..."); + logger << "zeroing all vectors..."; timer.reset(); grb::RC rc = data->init_vectors( io_zero ); ASSERT_RC_SUCCESS( rc ); @@ -241,23 +269,25 @@ static void allocate_system_structures( std::for_each( smoother_levels.begin(), smoother_levels.end(), []( std::unique_ptr< smoothing_data_t > &s) { ASSERT_RC_SUCCESS( s->init_vectors( io_zero ) ); } ); time = timer.time(); - MASTER_PRINT( pid, " time (ms) " << time << std::endl ); + logger << " time (ms) " << time << std::endl; } + /** * Builds and initializes a 3D system for an HPCG simulation according to the given 3D system sizes. * It allocates the data structures and populates them according to the algorithms chosen for HPCG. */ static void build_3d_system( - const simulation_input & in, std::vector< std::unique_ptr< mg_data_t > > &system_levels, std::vector< std::unique_ptr< coarsening_data_t > > &coarsener_levels, std::vector< std::unique_ptr< smoothing_data_t > > &smoother_levels, - std::unique_ptr< hpcg_data_t > &cg_system_data + std::unique_ptr< hpcg_data_t > &cg_system_data, + const simulation_input & in, + const mg_token_t & tt, + DistStream & logger ) { constexpr size_t DIMS = 3; using builder_t = grb::algorithms::HPCGSystemBuilder< DIMS, coord_t, NonzeroType >; - const size_t pid = spmd<>::pid(); grb::utils::Timer timer; HPCGSystemParams< DIMS, NonzeroType > params = { @@ -266,22 +296,21 @@ static void build_3d_system( }; std::vector< builder_t > mg_generators; - MASTER_PRINT( pid, "building HPCG generators for " << ( in.max_coarsening_levels + 1 ) - << " levels..." ); + logger << "building HPCG generators for " << ( in.max_coarsening_levels + 1 ) << " levels..."; timer.reset(); // construct the builder_t generator for each grid level, which depends on the system physics hpcg_build_multigrid_generators( params, mg_generators ); double time = timer.time(); - MASTER_PRINT( pid, " time (ms) " << time << std::endl ); - MASTER_PRINT( pid, "built HPCG generators for " << mg_generators.size() - << " levels" << std::endl ); + logger << " time (ms) " << time << std::endl; + logger << "built HPCG generators for " << mg_generators.size() + << " levels" << std::endl; // extract the size for each level std::vector< size_t > mg_sizes; std::transform( mg_generators.cbegin(), mg_generators.cend(), std::back_inserter( mg_sizes ), [] ( const builder_t &b ) { return b.system_size(); } ); // given the sizes, allocate the data structures for all the inputs of the algorithms - allocate_system_structures( mg_sizes, system_levels, coarsener_levels, smoother_levels, cg_system_data ); + allocate_system_structures( system_levels, coarsener_levels, smoother_levels, cg_system_data, mg_sizes, tt, logger ); assert( mg_generators.size() == system_levels.size() ); assert( mg_generators.size() == smoother_levels.size() ); assert( mg_generators.size() - 1 == coarsener_levels.size() ); // coarsener acts between two levels @@ -289,29 +318,29 @@ static void build_3d_system( // for each grid level, populate the data structures according to the specific algorithm // and track the time for diagnostics purposes for( size_t i = 0; i < mg_generators.size(); i++) { - MASTER_PRINT( pid, "SYSTEM LEVEL " << i << std::endl ); + logger << "SYSTEM LEVEL " << i << std::endl; auto& sizes = mg_generators[ i ].get_generator().get_sizes(); - MASTER_PRINT( pid, " sizes: " ); + logger << " sizes: "; for( size_t s = 0; s < DIMS - 1; s++ ) { - MASTER_PRINT( pid,sizes[ s ] << " x " ); + logger <A ); + grb::RC rc = hpcg_populate_system_matrix( mg_generators[ i ], system_levels.at(i)->A, logger ); time = timer.time(); ASSERT_RC_SUCCESS( rc ); - MASTER_PRINT( pid, " time (ms) " << time << std::endl ) + logger << " time (ms) " << time << std::endl; - MASTER_PRINT( pid, " populating smoothing data: " ); + logger << " populating smoothing data: "; timer.reset(); - rc = hpcg_populate_smoothing_data( mg_generators[ i ], *smoother_levels[ i ] ); + rc = hpcg_populate_smoothing_data( mg_generators[ i ], *smoother_levels[ i ], logger ); time = timer.time(); ASSERT_RC_SUCCESS( rc ); - MASTER_PRINT( pid, " time (ms) " << time << std::endl ) + logger << " time (ms) " << time << std::endl; if( i > 0 ) { - MASTER_PRINT( pid, " populating coarsening data: " ); + logger << " populating coarsening data: "; timer.reset(); if( !in.use_average_coarsener ) { rc = hpcg_populate_coarsener( mg_generators[ i - 1 ], mg_generators[ i ], *coarsener_levels[ i - 1 ] ); @@ -320,11 +349,12 @@ static void build_3d_system( } time = timer.time(); ASSERT_RC_SUCCESS( rc ); - MASTER_PRINT( pid, " time (ms) " << time << std::endl ) + logger << " time (ms) " << time << std::endl; } } } + /** * Main test, building an HPCG problem and running the simulation closely following the * parameters in the reference HPCG test. @@ -333,25 +363,44 @@ void grbProgram( const simulation_input & in, struct output & out ) { // get user process ID const size_t pid = spmd<>::pid(); grb::utils::Timer timer; - MASTER_PRINT( pid, "beginning input generation..." << std::endl ); + + dist_token_t dist( pid == 0 ); + class MyNumPunct : public std::numpunct { + // protected: + char do_thousands_sep() const override { return '\''; } + std::string do_grouping() const override { return "\03"; } + }; + std::locale old_locale = std::cout.imbue( std::locale( std::cout.getloc(), new MyNumPunct ) ); + DistStream logger( dist, std::cout ); + + logger << "beginning input generation..." << std::endl; // wrap hpcg_data inside a unique_ptr to forget about cleaning chores std::unique_ptr< hpcg_data_t > hpcg_state; + // log HPCG by default on master + hpcg_token_t hpcg_token( pid == 0 ); + // log Mg and smoother only if the user requested it + mg_token_t mg_token( pid == 0 && in.mg_log ); + + dbg_token_t dbg_token( pid == 0 ); + DBGStream dbg_stream( dbg_token, std::cout ); + // define the main HPCG runner and initialize the options of its components - hpcg_runner_t hpcg_runner( build_hpcg_runner< hpcg_desc, IOType, NonzeroType, InputType, ResidualType, - StdRing, StdMinus >( in.smoother_steps ) ); - auto &mg_runner = hpcg_runner.mg_runner; - auto &coarsener = mg_runner.coarsener_runner; - auto &smoother = mg_runner.smoother_runner; - hpcg_runner.cg_opts.tolerance = residual_zero; - hpcg_runner.cg_opts.with_preconditioning = ! in.no_preconditioning; + coarsener_runner_t coarsener; + smoother_runner_t smoother; + smoother.presmoother_steps = smoother.postsmoother_steps = in.smoother_steps; + smoother.non_recursive_smooth_steps = 1UL; + mg_runner_t mg_runner( smoother, coarsener, dbg_stream ); + hpcg_runner_t hpcg_runner( hpcg_token, mg_runner, dbg_stream ); + hpcg_runner.tolerance = residual_zero; + hpcg_runner.with_preconditioning = ! in.no_preconditioning; timer.reset(); // build the entire multi-grid system - build_3d_system( in, mg_runner.system_levels, coarsener.coarsener_levels, smoother.levels, hpcg_state ); + build_3d_system( mg_runner.system_levels, coarsener.coarsener_levels, smoother.levels, hpcg_state, in, mg_token, logger ); double input_duration = timer.time(); - MASTER_PRINT( pid, "input generation time (ms): " << input_duration << std::endl ); + logger << "input generation time (ms): " << input_duration << std::endl; #ifdef HPCG_PRINT_SYSTEM if( pid == 0 ) { @@ -367,7 +416,7 @@ void grbProgram( const simulation_input & in, struct output & out ) { // set vectors as from standard HPCG benchmark set( x, 1.0 ); set( b, nz_zero ); - rc = grb::mxv( b, A, x, StdRing() ); + rc = grb::mxv( b, A, x, Ring() ); set( x, io_zero ); #ifdef HPCG_PRINT_SYSTEM @@ -382,32 +431,43 @@ void grbProgram( const simulation_input & in, struct output & out ) { mg_data_t &grid_base = *mg_runner.system_levels[ 0 ]; // do a cold run to warm the system up - MASTER_PRINT( pid, TEXT_HIGHLIGHT << "beginning cold run..." << std::endl ); - hpcg_runner.cg_opts.max_iterations = 1; + logger << TEXT_HIGHLIGHT << "beginning cold run..." << std::endl; + hpcg_runner.max_iterations = 1; timer.reset(); rc = hpcg_runner( grid_base, *hpcg_state, out.cg_out ); double iter_duration = timer.time(); ASSERT_RC_SUCCESS( rc ); - MASTER_PRINT( pid, " time (ms): " << iter_duration << std::endl ); + logger << " time (ms): " << iter_duration << std::endl; // restore CG options to user-given values - hpcg_runner.cg_opts.max_iterations = in.max_iterations; - hpcg_runner.cg_opts.print_iter_residual = in.print_iter_stats; - mg_runner.print_duration = in.print_iter_stats; - MASTER_PRINT( pid, TEXT_HIGHLIGHT << "beginning solver..." << std::endl ); + hpcg_runner.max_iterations = in.max_iterations; + logger << TEXT_HIGHLIGHT << "beginning solver..." << std::endl; out.inner_test_repetitions = 0; out.times.useful = 0.0; + + hpcg_csv_t hpcg_csv( hpcg_token, { "repetition", "time" } ); + mg_csv_t mg_csv( mg_token, { "repetition", "level", "mg time", "smoother time" } ); + // do benchmark for( size_t i = 0; i < in.inner_test_repetitions; ++i ) { rc = set( x, io_zero ); ASSERT_RC_SUCCESS( rc ); - MASTER_PRINT( pid, TEXT_HIGHLIGHT << "beginning iteration: " << i << std::endl ); + logger << TEXT_HIGHLIGHT << "beginning iteration: " << i << std::endl; timer.reset(); rc = hpcg_runner( grid_base, *hpcg_state, out.cg_out ); iter_duration = timer.time(); out.times.useful += iter_duration; ASSERT_RC_SUCCESS( rc ); - MASTER_PRINT( pid, "repetition,duration (ms): " << i << "," << iter_duration << std::endl ); + hpcg_csv.add_line( i, hpcg_runner.getElapsedNano() ); + logger << "repetition,duration (ns): " << hpcg_csv.last_line() << std::endl; + for( const auto & mg_level : mg_runner.system_levels ) { + mg_csv.add_line( i, mg_level->level, mg_level->mg_stopwatch.getElapsedNano(), + mg_level->sm_stopwatch.getElapsedNano() ); + mg_level->mg_stopwatch.reset(); + mg_level->sm_stopwatch.reset(); + } + hpcg_runner.reset(); + out.inner_test_repetitions++; } if( in.evaluation_run ) { @@ -417,8 +477,9 @@ void grbProgram( const simulation_input & in, struct output & out ) { } out.times.useful /= static_cast< double >( in.inner_test_repetitions ); - MASTER_PRINT( pid, TEXT_HIGHLIGHT << "repetitions,average time (ms): " << out.inner_test_repetitions - << ", " << out.times.useful << std::endl ); + logger << TEXT_HIGHLIGHT << "repetitions,average time (ms): " << out.inner_test_repetitions + << ", " << out.times.useful << std::endl; + std::cout.imbue( old_locale ); // start postamble timer.reset(); @@ -426,16 +487,26 @@ void grbProgram( const simulation_input & in, struct output & out ) { out.error_code = rc; grb::set( b, 1.0 ); - grb::eWiseMul( b, -1.0, x, StdRing() ); + grb::eWiseMul( b, -1.0, x, Ring() ); out.square_norm_diff = nz_zero; - grb::dot( out.square_norm_diff, b, b, StdRing() ); + grb::dot( out.square_norm_diff, b, b, Ring() ); // output out.pinnedVector.reset( new PinnedVector< NonzeroType >( x, SEQUENTIAL ) ); // finish timing out.times.postamble = timer.time(); + + // write measurements into CSV files + if ( in.hpcg_log ) { + hpcg_csv.write_to_file( in.hpcg_csv.data() ); + } + if ( in.mg_log ) { + mg_csv.write_to_file( in.mg_csv.data() ); + } } +#define thcout ( std::cout << TEXT_HIGHLIGHT ) + /** * Parser the command-line arguments to extract the simulation information and checks they are valid. */ @@ -456,7 +527,6 @@ int main( int argc, char ** argv ) { thcout << "Max iterations: " << sim_in.max_iterations << std::endl; thcout << "Direct launch: " << std::boolalpha << sim_in.evaluation_run << std::noboolalpha << std::endl; thcout << "No conditioning: " << std::boolalpha << sim_in.no_preconditioning << std::noboolalpha << std::endl; - thcout << "Print iteration residual: " << std::boolalpha << sim_in.print_iter_stats << std::noboolalpha << std::endl; thcout << "Smoother steps: " << sim_in.smoother_steps << std::endl; thcout << "Test outer iterations: " << test_outer_iterations << std::endl; thcout << "Maximum norm for residual: " << max_diff_norm << std::endl; @@ -500,19 +570,19 @@ int main( int argc, char ** argv ) { // check result vector, stored inside a pinned vector ASSERT_TRUE( out.pinnedVector ); const PinnedVector< double > &solution = *out.pinnedVector; - thcout << "Size of x is " << solution.size() << std::endl; - ASSERT_GT( solution.size(), 0 ); - print_vector( solution, 30, "SOLUTION" ); + ASSERT_EQ( solution.size(), sim_in.nx * sim_in.ny * sim_in.nz ); // check norm of solution w.r.t. expected solution (i.e. vector of all 1) double diff_norm = sqrt( out.square_norm_diff ); - thcout << "Norm of difference vector | - |: " << diff_norm << std::endl; + thcout << "Norm of difference vector: | - | = " << diff_norm << std::endl; ASSERT_LT( diff_norm, max_diff_norm ); thcout << "Test OK" << std::endl; return 0; } +static const char * const empty = ""; + static void parse_arguments( simulation_input & sim_in, size_t & outer_iterations, @@ -520,8 +590,9 @@ static void parse_arguments( int argc, char ** argv ) { - argument_parser parser; + const char * hpcg_csv, * mg_csv; + parser.add_optional_argument( "--nx", sim_in.nx, PHYS_SYSTEM_SIZE_DEF, "physical system size along x" ) .add_optional_argument( "--ny", sim_in.ny, PHYS_SYSTEM_SIZE_DEF, "physical system size along y" ) .add_optional_argument( "--nz", sim_in.nz, PHYS_SYSTEM_SIZE_DEF, "physical system size along z" ) @@ -543,8 +614,10 @@ static void parse_arguments( "launch single run directly, without benchmarker (ignore repetitions)" ) .add_option( "--no-preconditioning", sim_in.no_preconditioning, false, "do not apply pre-conditioning via multi-grid V cycle" ) - .add_option( "--print-iter-stats", sim_in.print_iter_stats, false, - "on each iteration, print more statistics" ) + .add_optional_argument( "--hpcg-csv", hpcg_csv , empty, + "file for HPCG run measurements (overwrites any previous)" ) + .add_optional_argument( "--mg-csv", mg_csv , empty, + "file for Multigrid run measurements (overwrites any previous)" ) .add_option( "--use-average-coarsener", sim_in.use_average_coarsener, false, "coarsen by averaging instead of by sampling a single point (slower, but more accurate)" ); @@ -564,6 +637,7 @@ static void parse_arguments( std::exit( -1 ); } + // check sizes const size_t max_system_divider = 1 << sim_in.max_coarsening_levels; for( size_t s : { sim_in.nx, sim_in.ny, sim_in.nz } ) { std::lldiv_t div_res = std::div( static_cast< long long >( s ), static_cast< long long >( max_system_divider ) ); @@ -582,4 +656,22 @@ static void parse_arguments( std::exit( -1 ); } } + + // check output CSVs + size_t len = std::strlen( hpcg_csv ); + if( ( sim_in.hpcg_log = len > 0 ) ) { + if ( len > MAX_CSV_PATH_LENGTH ) { + std::cerr << "HPCG CSV file name is too long!" << std::endl; + std::exit( -1 ); + } + std::strncpy( sim_in.hpcg_csv.data(), hpcg_csv, MAX_CSV_PATH_LENGTH ); + } + len = std::strlen( mg_csv ); + if( ( sim_in.mg_log = len > 0 ) ) { + if ( len > MAX_CSV_PATH_LENGTH ) { + std::cerr << "HPCG CSV file name is too long!" << std::endl; + std::exit( -1 ); + } + std::strncpy( sim_in.mg_csv.data(), mg_csv, MAX_CSV_PATH_LENGTH ); + } } From 74283e6209a71460df21f634240dd611010ab6c5 Mon Sep 17 00:00:00 2001 From: Alberto Scolari Date: Thu, 2 Mar 2023 15:26:46 +0100 Subject: [PATCH 21/28] renaming telemetry API elements --- .../multigrid/multigrid_building_utils.hpp | 14 +- .../algorithms/multigrid/multigrid_cg.hpp | 14 +- .../algorithms/multigrid/multigrid_data.hpp | 20 +- .../multigrid/multigrid_v_cycle.hpp | 8 +- .../multigrid/red_black_gauss_seidel.hpp | 12 +- .../multigrid/single_matrix_coarsener.hpp | 7 +- .../graphblas/utils/telemetry/CSVWriter.hpp | 145 ++++---- .../utils/telemetry/OutputStream.hpp | 28 +- .../graphblas/utils/telemetry/Stopwatch.hpp | 40 +-- .../graphblas/utils/telemetry/Telemetry.hpp | 2 +- .../utils/telemetry/TelemetryBase.hpp | 50 +-- .../utils/telemetry/TelemetryController.hpp | 319 ++++++++++++++++++ .../utils/telemetry/TelemetryToken.hpp | 145 -------- .../graphblas/utils/telemetry/Timeable.hpp | 18 +- tests/smoke/hpcg.cpp | 121 ++++--- 15 files changed, 539 insertions(+), 404 deletions(-) create mode 100644 include/graphblas/utils/telemetry/TelemetryController.hpp delete mode 100644 include/graphblas/utils/telemetry/TelemetryToken.hpp diff --git a/include/graphblas/algorithms/multigrid/multigrid_building_utils.hpp b/include/graphblas/algorithms/multigrid/multigrid_building_utils.hpp index ad09f4c9f..f46b8e558 100644 --- a/include/graphblas/algorithms/multigrid/multigrid_building_utils.hpp +++ b/include/graphblas/algorithms/multigrid/multigrid_building_utils.hpp @@ -56,24 +56,26 @@ namespace grb { * the coarser system matrix (in this order) * @tparam SmootherInfoType type holding the information for the smoother; * its constructor must take in input the size of the system matrix for that level + * @tparam TelControllerType telemetry controller type, to (de)activate time measurement at compile-time * * @param mg_sizes sizes of the system matrix for each level of the multi-grid * @param system_levels system data (system matrix, residual, solution, ...) for each level * @param coarsener_levels at position \a i of this vector, data to coarsen from level \a i * (system size \p mg_sizes [i] ) to level \a i+1 (system size \p mg_sizes [i+1] ) * @param smoother_levels smoother data for each level + * @param tt telemetry controller to control time tracing */ template< typename MGInfoType, typename CoarsenerInfoType, typename SmootherInfoType, - typename TelTokenType + typename TelControllerType > void multigrid_allocate_data( - std::vector< std::unique_ptr< MGInfoType > > &system_levels, - std::vector< std::unique_ptr< CoarsenerInfoType > > &coarsener_levels, - std::vector< std::unique_ptr< SmootherInfoType > > &smoother_levels, - const std::vector< size_t > &mg_sizes, - const TelTokenType & tt + std::vector< std::unique_ptr< MGInfoType > > & system_levels, + std::vector< std::unique_ptr< CoarsenerInfoType > > & coarsener_levels, + std::vector< std::unique_ptr< SmootherInfoType > > & smoother_levels, + const std::vector< size_t > & mg_sizes, + const TelControllerType & tt ) { if( mg_sizes.size() == 0 ) { throw std::invalid_argument( "at least one size should be available" ); diff --git a/include/graphblas/algorithms/multigrid/multigrid_cg.hpp b/include/graphblas/algorithms/multigrid/multigrid_cg.hpp index 2bb936a1c..3099e7d4e 100644 --- a/include/graphblas/algorithms/multigrid/multigrid_cg.hpp +++ b/include/graphblas/algorithms/multigrid/multigrid_cg.hpp @@ -113,10 +113,10 @@ namespace grb { template< typename MGCGTypes, typename MultiGridRunnerType, - typename TelTokenType, + typename TelControllerType, Descriptor descr = descriptors::no_operation, typename DbgOutputStreamType = grb::utils::telemetry::OutputStreamOff - > struct MultiGridCGRunner : public grb::utils::telemetry::Timeable< TelTokenType > { + > struct MultiGridCGRunner : public grb::utils::telemetry::Timeable< TelControllerType > { using IOType = typename MGCGTypes::IOType; using NonzeroType = typename MGCGTypes::NonzeroType; @@ -152,10 +152,10 @@ namespace grb { * as the state of the MG runner is managed automatically with this object. */ MultiGridCGRunner( - const TelTokenType & tt, - MultiGridRunnerType &_mg_runner + const TelControllerType & tt, + MultiGridRunnerType & _mg_runner ) : - grb::utils::telemetry::Timeable< TelTokenType >( tt ), + grb::utils::telemetry::Timeable< TelControllerType >( tt ), mg_runner( _mg_runner ), dbg_logger() { @@ -163,11 +163,11 @@ namespace grb { } MultiGridCGRunner( - const TelTokenType & tt, + const TelControllerType & tt, MultiGridRunnerType & _mg_runner, DbgOutputStreamType & _dbg_logger ) : - grb::utils::telemetry::Timeable< TelTokenType >( tt ), + grb::utils::telemetry::Timeable< TelControllerType >( tt ), mg_runner( _mg_runner ), dbg_logger( _dbg_logger ) {} diff --git a/include/graphblas/algorithms/multigrid/multigrid_data.hpp b/include/graphblas/algorithms/multigrid/multigrid_data.hpp index ed580da3d..67fe7bb8f 100644 --- a/include/graphblas/algorithms/multigrid/multigrid_data.hpp +++ b/include/graphblas/algorithms/multigrid/multigrid_data.hpp @@ -47,17 +47,19 @@ namespace grb { * @tparam IOType Type of values of the vectors for intermediate results * @tparam NonzeroType Type of the values stored inside the system matrix \p A * and the coarsening matrix #Ax_finer + * @tparam TelControllerType type of the controller for telemetry, to compile-time (de)activate + * the (mg_sm)_stopwatches */ template< typename IOType, typename NonzeroType, - typename TelTokenType + typename TelControllerType > struct MultiGridData { - grb::utils::telemetry::Stopwatch< TelTokenType > mg_stopwatch; - grb::utils::telemetry::Stopwatch< TelTokenType > sm_stopwatch; - const size_t level; ///< level of the grid (0 for the finest physical system) - const size_t system_size; ///< size of the system, i.e. side of the #A system matrix + grb::utils::telemetry::Stopwatch< TelControllerType > mg_stopwatch; ///< stopwatch to measure the execution time in MG + grb::utils::telemetry::Stopwatch< TelControllerType > sm_stopwatch; ///< stopwatch to measure the execution time in the smoother + const size_t level; ///< level of the grid (0 for the finest physical system) + const size_t system_size; ///< size of the system, i.e. side of the #A system matrix grb::Matrix< NonzeroType > A; ///< system matrix grb::Vector< IOType > z; ///< multi-grid solution grb::Vector< IOType > r; ///< residual @@ -66,7 +68,7 @@ namespace grb { * Construct a new multigrid data object from level information and system size. */ MultiGridData( - const TelTokenType & _tt, + const TelControllerType & _tt, size_t _level, size_t sys_size ) : @@ -79,10 +81,10 @@ namespace grb { r( sys_size ) {} // for safety, disable copy semantics - MultiGridData( const MultiGridData< IOType, NonzeroType, TelTokenType > & o ) = delete; + MultiGridData( const MultiGridData< IOType, NonzeroType, TelControllerType > & o ) = delete; - MultiGridData & operator=( - const MultiGridData< IOType, NonzeroType, TelTokenType > & ) = delete; + MultiGridData< IOType, NonzeroType, TelControllerType > & operator=( + const MultiGridData< IOType, NonzeroType, TelControllerType > & ) = delete; grb::RC init_vectors( IOType zero ) { grb::RC rc = grb::set( z, zero ); diff --git a/include/graphblas/algorithms/multigrid/multigrid_v_cycle.hpp b/include/graphblas/algorithms/multigrid/multigrid_v_cycle.hpp index 6ab53b469..dbe15d2b8 100644 --- a/include/graphblas/algorithms/multigrid/multigrid_v_cycle.hpp +++ b/include/graphblas/algorithms/multigrid/multigrid_v_cycle.hpp @@ -61,17 +61,19 @@ namespace grb { typename MGTypes, typename MGSmootherType, typename CoarsenerType, - typename TelTokenType, + typename TelControllerType, Descriptor descr = descriptors::no_operation, typename DbgOutputStreamType = grb::utils::telemetry::OutputStreamOff > struct MultiGridRunner { - using self_t = MultiGridRunner< MGTypes, MGSmootherType, CoarsenerType, TelTokenType, descr >; + using self_t = MultiGridRunner< MGTypes, MGSmootherType, CoarsenerType, TelControllerType, descr >; + // algebraic types using IOType = typename MGTypes::IOType; using NonzeroType = typename MGTypes::NonzeroType; using Ring = typename MGTypes::Ring; using Minus = typename MGTypes::Minus; - using MultiGridInputType = MultiGridData< IOType, NonzeroType, TelTokenType >; + using MultiGridInputType = MultiGridData< IOType, NonzeroType, TelControllerType >; + // runners using SmootherRunnerType = MGSmootherType; using CoarsenerRunnerType = CoarsenerType; diff --git a/include/graphblas/algorithms/multigrid/red_black_gauss_seidel.hpp b/include/graphblas/algorithms/multigrid/red_black_gauss_seidel.hpp index 02d0c5dd4..305fa30d7 100644 --- a/include/graphblas/algorithms/multigrid/red_black_gauss_seidel.hpp +++ b/include/graphblas/algorithms/multigrid/red_black_gauss_seidel.hpp @@ -66,22 +66,22 @@ namespace grb { * * It stores the information to smooth each level of the grid, to be initalized separately. * - * @tparam IOType type of result and intermediate vectors used during computation - * @tparam NonzeroType type of matrix values - * @tparam Ring the ring of algebraic operators + * @tparam SmootherTypes container of algebraic tyoes for the smoother (IOType, NonzeroType, Ring) + * @tparam TelControllerType telemetry controller to (de)activate time tracing within passed MultiGridData objects * @tparam descr descriptors with statically-known data for computation and containers */ template < class SmootherTypes, - typename TelTokenType, + typename TelControllerType, Descriptor descr = descriptors::no_operation > struct RedBlackGSSmootherRunner { using IOType = typename SmootherTypes::IOType; using NonzeroType = typename SmootherTypes::NonzeroType; using Ring = typename SmootherTypes::Ring; - using SmootherInputType = MultiGridData< IOType, NonzeroType, TelTokenType >; - using SmootherDataType = SmootherData< IOType >; + using Minus = typename SmootherTypes::Minus; + using SmootherInputType = MultiGridData< IOType, NonzeroType, TelControllerType >; ///< external input structure + using SmootherDataType = SmootherData< IOType >; ///< smoothing information and temporary variables (per MG level) size_t presmoother_steps = 1UL; ///< number of pre-smoother steps size_t postsmoother_steps = 1UL; ///< number of post-smoother steps diff --git a/include/graphblas/algorithms/multigrid/single_matrix_coarsener.hpp b/include/graphblas/algorithms/multigrid/single_matrix_coarsener.hpp index 3d1fee648..0e2ee58af 100644 --- a/include/graphblas/algorithms/multigrid/single_matrix_coarsener.hpp +++ b/include/graphblas/algorithms/multigrid/single_matrix_coarsener.hpp @@ -69,7 +69,7 @@ namespace grb { */ template< class CoarsenerTypes, - typename TelTokenType, + typename TelControllerType, Descriptor descr = descriptors::no_operation > struct SingleMatrixCoarsener { @@ -77,8 +77,9 @@ namespace grb { using NonzeroType = typename CoarsenerTypes::NonzeroType; using Ring = typename CoarsenerTypes::Ring; using Minus = typename CoarsenerTypes::Minus; - using MultiGridInputType = MultiGridData< IOType, NonzeroType, TelTokenType >; - using CoarseningDataType = CoarseningData< IOType, NonzeroType >; + + using MultiGridInputType = MultiGridData< IOType, NonzeroType, TelControllerType >; ///< input data from MG + using CoarseningDataType = CoarseningData< IOType, NonzeroType >; ///< internal data with coarsening information static_assert( std::is_default_constructible< Ring >::value, "cannot construct the Ring with default values" ); diff --git a/include/graphblas/utils/telemetry/CSVWriter.hpp b/include/graphblas/utils/telemetry/CSVWriter.hpp index 969b73be8..94a7111b6 100644 --- a/include/graphblas/utils/telemetry/CSVWriter.hpp +++ b/include/graphblas/utils/telemetry/CSVWriter.hpp @@ -23,15 +23,15 @@ #ifndef _H_GRB_UTILS_TELEMETRY_CSV_WRITER #define _H_GRB_UTILS_TELEMETRY_CSV_WRITER -#include -#include -#include -#include +#include #include #include #include -#include +#include +#include +#include #include +#include #include "TelemetryBase.hpp" @@ -41,46 +41,35 @@ namespace grb { static constexpr char STD_CSV_SEP = ','; - template< - typename TelTokenType, - bool enabled, - class T1, - class ...Ts - > class CSVWriter : public TelemetryBase< TelTokenType, enabled > { + template< typename TelControllerType, bool enabled, class T1, class... Ts > + class CSVWriter : public TelemetryBase< TelControllerType, enabled > { public: - template< class U, class ...Us > struct is_csv_printable { + template< class U, class... Us > + struct is_csv_printable { static constexpr bool value = std::is_arithmetic< U >::value; }; - template< class U1, class U2, class ...Us > struct is_csv_printable< U1, U2, Us...> { + template< class U1, class U2, class... Us > + struct is_csv_printable< U1, U2, Us... > { static constexpr bool value = is_csv_printable< U1 >::value && is_csv_printable< U2, Us... >::value; }; static_assert( is_csv_printable< T1, Ts... >::value, "not all types are printable" ); - using self_t = CSVWriter< TelTokenType, enabled, T1, Ts... >; + using self_t = CSVWriter< TelControllerType, enabled, T1, Ts... >; - using base_t = TelemetryBase< TelTokenType, enabled >; + using base_t = TelemetryBase< TelControllerType, enabled >; CSVWriter() = delete; - CSVWriter( - const TelTokenType & tt, - std::initializer_list< const char * > _headers, - char _separator, - size_t size - ) : - base_t( tt ) - { - ( void ) tt; - ( void ) _headers; - ( void ) _separator; - ( void ) size; + CSVWriter( const TelControllerType & tt, std::initializer_list< const char * > _headers, char _separator, size_t size ) : base_t( tt ) { + (void)tt; + (void)_headers; + (void)_separator; + (void)size; } - CSVWriter( const TelTokenType & tt, std::initializer_list< const char * > _headers ) : - CSVWriter( tt, _headers, STD_CSV_SEP, 10 ) - {} + CSVWriter( const TelControllerType & tt, std::initializer_list< const char * > _headers ) : CSVWriter( tt, _headers, STD_CSV_SEP, 10 ) {} CSVWriter( const self_t & ) = delete; @@ -90,8 +79,8 @@ namespace grb { self_t & operator=( self_t && ) = delete; - template< class... UTypes > void add_line( UTypes&&... ) { - } + template< class... UTypes > + void add_line( UTypes &&... ) {} void clear() {} @@ -100,37 +89,37 @@ namespace grb { } // print nothing - char last_line() const { return '\0'; } + char last_line() const { + return '\0'; + } std::ostream & write_to_stream( std::ostream & stream ) const { return stream; } void write_to_file( const char * name ) const { - ( void ) name; + (void)name; } }; - - template< - typename TelTokenType, - class T1, - class ...Ts - > class CSVWriter< TelTokenType, true, T1, Ts... > : public TelemetryBase< TelTokenType, true > { + template< typename TelControllerType, class T1, class... Ts > + class CSVWriter< TelControllerType, true, T1, Ts... > : public TelemetryBase< TelControllerType, true > { public: - template< class U, class ...Us > struct is_csv_printable { + template< class U, class... Us > + struct is_csv_printable { static constexpr bool value = std::is_arithmetic< U >::value; }; - template< class U1, class U2, class ...Us > struct is_csv_printable< U1, U2, Us...> { + template< class U1, class U2, class... Us > + struct is_csv_printable< U1, U2, Us... > { static constexpr bool value = is_csv_printable< U1 >::value && is_csv_printable< U2, Us... >::value; }; static_assert( is_csv_printable< T1, Ts... >::value, "not all types are printable" ); - using self_t = CSVWriter< TelTokenType, true, T1, Ts... >; + using self_t = CSVWriter< TelControllerType, true, T1, Ts... >; - using base_t = TelemetryBase< TelTokenType, true >; + using base_t = TelemetryBase< TelControllerType, true >; class CSVLastTuple { public: @@ -138,10 +127,7 @@ namespace grb { CSVLastTuple( const CSVLastTuple & clt ) : csv( clt.csv ) {} - inline friend std::ostream & operator<<( - std::ostream & stream, - const CSVLastTuple & t - ) { + inline friend std::ostream & operator<<( std::ostream & stream, const CSVLastTuple & t ) { return t.csv.write_last_line_to_stream( stream ); } @@ -151,15 +137,7 @@ namespace grb { CSVWriter() = delete; - CSVWriter( - const TelTokenType & tt, - std::initializer_list< const char * > _headers, - char _separator, - size_t size - ) : - base_t( tt ), - separator( _separator ) - { + CSVWriter( const TelControllerType & tt, std::initializer_list< const char * > _headers, char _separator, size_t size ) : base_t( tt ), separator( _separator ) { if( _headers.size() != NUM_FIELDS ) { throw std::runtime_error( "wrong number of headers, it must match the unmber of line elements" ); } @@ -168,17 +146,15 @@ namespace grb { for( const auto & h : _headers ) { headers.emplace_back( h ); } - if ( !tt.is_active() ) { + if( ! tt.is_active() ) { return; } lines.reserve( size ); // zero to force physical allocation - //std::memset( reinterpret_cast< void * >( lines.data() ), 0, lines.size() * sizeof( tuple_t ) ); + // std::memset( reinterpret_cast< void * >( lines.data() ), 0, lines.size() * sizeof( tuple_t ) ); } - CSVWriter( const TelTokenType & tt, std::initializer_list< const char * > _headers ) : - CSVWriter( tt, _headers, STD_CSV_SEP, 10 ) - {} + CSVWriter( const TelControllerType & tt, std::initializer_list< const char * > _headers ) : CSVWriter( tt, _headers, STD_CSV_SEP, 10 ) {} CSVWriter( const self_t & ) = delete; @@ -188,9 +164,10 @@ namespace grb { self_t & operator=( self_t && ) = delete; - template< class... UTypes > void add_line( UTypes&&... vs ) { - if ( this->is_active() ) { - lines.emplace_back( std::forward( vs )... ); + template< class... UTypes > + void add_line( UTypes &&... vs ) { + if( this->is_active() ) { + lines.emplace_back( std::forward< UTypes >( vs )... ); } } @@ -199,21 +176,21 @@ namespace grb { } std::ostream & write_last_line_to_stream( std::ostream & stream ) const { - if ( lines.size() > 0 && this->is_active() ) { + if( lines.size() > 0 && this->is_active() ) { write_line( stream, lines.back() ); } return stream; } CSVLastTuple last_line() const { - if ( lines.size() == 0 ) { + if( lines.size() == 0 ) { throw std::runtime_error( "no measures" ); } return CSVLastTuple( *this ); } std::ostream & write_to_stream( std::ostream & stream ) const { - if ( !this->is_active() ) { + if( ! this->is_active() ) { return stream; } write_header( stream ); @@ -226,11 +203,11 @@ namespace grb { } void write_to_file( const char * name ) const { - if ( !this->is_active() ) { + if( ! this->is_active() ) { return; } std::ofstream file( name ); - if( !file.is_open() ) { + if( ! file.is_open() ) { throw std::runtime_error( "cannot open file" ); } write_to_stream( file ); @@ -261,33 +238,25 @@ namespace grb { } // recursive case - template< size_t OFFS > inline void write_val( - std::ostream & stream, - typename std::enable_if< OFFS < NUM_FIELDS - 1, const tuple_t &>::type _tup - ) const { + template< size_t OFFS > + inline void write_val( std::ostream & stream, typename std::enable_if < OFFS< NUM_FIELDS - 1, const tuple_t & >::type _tup ) const { stream << std::get< OFFS >( _tup ) << separator; write_val< OFFS + 1 >( stream, _tup ); // tail recursion } // base case - template< size_t OFFS > inline void write_val( - std::ostream & stream, - typename std::enable_if< OFFS == NUM_FIELDS - 1, const tuple_t &>::type _tup - ) const { - (void) separator; + template< size_t OFFS > + inline void write_val( std::ostream & stream, typename std::enable_if< OFFS == NUM_FIELDS - 1, const tuple_t & >::type _tup ) const { + (void)separator; stream << std::get< OFFS >( _tup ); } - }; - template< - class T1, - class ...Ts - > using StaticCSVWriter = CSVWriter< TelemetryTokenAlwaysOn, true, T1, Ts... >; - - } - } -} + template< class T1, class... Ts > + using StaticCSVWriter = CSVWriter< TelemetryControllerAlwaysOn, true, T1, Ts... >; + } // namespace telemetry + } // namespace utils +} // namespace grb #endif // _H_GRB_UTILS_TELEMETRY_CSV_WRITER diff --git a/include/graphblas/utils/telemetry/OutputStream.hpp b/include/graphblas/utils/telemetry/OutputStream.hpp index 35622b11a..8ec0606d7 100644 --- a/include/graphblas/utils/telemetry/OutputStream.hpp +++ b/include/graphblas/utils/telemetry/OutputStream.hpp @@ -57,16 +57,16 @@ namespace grb { }; template< - typename TelTokenType, - bool enabled = TelTokenType::enabled - > class OutputStream : public TelemetryBase< TelTokenType, enabled > { + typename TelControllerType, + bool enabled = TelControllerType::enabled + > class OutputStream : public TelemetryBase< TelControllerType, enabled > { public: - using self_t = OutputStream< TelTokenType, enabled >; + using self_t = OutputStream< TelControllerType, enabled >; OutputStream() = default; - OutputStream( const TelTokenType & _tt, std::ostream & _out ) : - TelemetryBase< TelTokenType, enabled >( _tt ) + OutputStream( const TelControllerType & _tt, std::ostream & _out ) : + TelemetryBase< TelControllerType, enabled >( _tt ) { ( void ) _out; } @@ -96,15 +96,15 @@ namespace grb { } }; - template< typename TelTokenType > class OutputStream< TelTokenType, true > : - public TelemetryBase< TelTokenType, true > { + template< typename TelControllerType > class OutputStream< TelControllerType, true > : + public TelemetryBase< TelControllerType, true > { public: - using self_t = OutputStream< TelTokenType, true >; + using self_t = OutputStream< TelControllerType, true >; - using base_t = TelemetryBase< TelTokenType, true >; + using base_t = TelemetryBase< TelControllerType, true >; - OutputStream( const TelTokenType & _tt, std::ostream & _out ) : - TelemetryBase< TelTokenType, true >( _tt ), + OutputStream( const TelControllerType & _tt, std::ostream & _out ) : + TelemetryBase< TelControllerType, true >( _tt ), out( _out ) {} @@ -142,9 +142,9 @@ namespace grb { std::ostream & out; }; - using OutputStreamOff = OutputStream< TelemetryTokenAlwaysOff, false >; + using OutputStreamOff = OutputStream< TelemetryControllerAlwaysOff, false >; - using OutputStreamOn = OutputStream< TelemetryTokenAlwaysOn, true >; + using OutputStreamOn = OutputStream< TelemetryControllerAlwaysOn, true >; } } } diff --git a/include/graphblas/utils/telemetry/Stopwatch.hpp b/include/graphblas/utils/telemetry/Stopwatch.hpp index 2cc900b61..1faa2e186 100644 --- a/include/graphblas/utils/telemetry/Stopwatch.hpp +++ b/include/graphblas/utils/telemetry/Stopwatch.hpp @@ -48,19 +48,12 @@ namespace grb { static inline duration_float_t nano2Sec( duration_nano_t nano ) { return static_cast< duration_float_t >( nano ) / 1000000000UL; } - }; - template< - typename TelTokenType, - bool enabled = TelTokenType::enabled - > class Stopwatch: - public StopwatchBase, public TelemetryBase< TelTokenType, enabled > { + template< typename TelControllerType, bool enabled = TelControllerType::enabled > + class Stopwatch : public StopwatchBase, public TelemetryBase< TelControllerType, enabled > { public: - Stopwatch( const TelTokenType & tt ) : - StopwatchBase(), - TelemetryBase< TelTokenType, enabled >( tt ) - {} + Stopwatch( const TelControllerType & tt ) : StopwatchBase(), TelemetryBase< TelControllerType, enabled >( tt ) {} Stopwatch( const Stopwatch & ) = default; @@ -79,11 +72,8 @@ namespace grb { } }; - - template< - typename TelTokenType - > class Stopwatch< TelTokenType, true >: - public StopwatchBase, public TelemetryBase< TelTokenType, true > { + template< typename TelControllerType > + class Stopwatch< TelControllerType, true > : public StopwatchBase, public TelemetryBase< TelControllerType, true > { typedef typename std::chrono::high_resolution_clock clock_t; @@ -96,23 +86,19 @@ namespace grb { time_point_t beginning; public: - Stopwatch( const TelTokenType & tt ) : - StopwatchBase(), - TelemetryBase< TelTokenType, true >( tt ), - elapsedTime( duration_t::zero() ) - {} + Stopwatch( const TelControllerType & tt ) : StopwatchBase(), TelemetryBase< TelControllerType, true >( tt ), elapsedTime( duration_t::zero() ) {} Stopwatch( const Stopwatch & s ) = default; inline void start() { - if ( this->is_active() ) { + if( this->is_active() ) { beginning = clock_t::now(); } } inline duration_nano_t stop() { duration_nano_t count = 0; - if ( this->is_active() ) { + if( this->is_active() ) { time_point_t end = clock_t::now(); duration_t d = end - beginning; count = d.count(); @@ -123,7 +109,7 @@ namespace grb { inline duration_nano_t reset() { duration_t r = duration_t::zero(); - if ( this->is_active() ) { + if( this->is_active() ) { r = elapsedTime; elapsedTime = duration_t::zero(); } @@ -135,9 +121,9 @@ namespace grb { } }; - using StaticStopwatch = Stopwatch< TelemetryTokenAlwaysOn, true >; - } - } -} + using StaticStopwatch = Stopwatch< TelemetryControllerAlwaysOn, true >; + } // namespace telemetry + } // namespace utils +} // namespace grb #endif // _H_GRB_UTILS_TELEMETRY_STOPWATCH diff --git a/include/graphblas/utils/telemetry/Telemetry.hpp b/include/graphblas/utils/telemetry/Telemetry.hpp index f8369d1d1..0bb35909b 100644 --- a/include/graphblas/utils/telemetry/Telemetry.hpp +++ b/include/graphblas/utils/telemetry/Telemetry.hpp @@ -23,7 +23,7 @@ #ifndef _H_GRB_UTILS_TELEMETRY_TELEMETRY #define _H_GRB_UTILS_TELEMETRY_TELEMETRY -#include "TelemetryToken.hpp" +#include "TelemetryController.hpp" #include "Stopwatch.hpp" #include "Timeable.hpp" #include "CSVWriter.hpp" diff --git a/include/graphblas/utils/telemetry/TelemetryBase.hpp b/include/graphblas/utils/telemetry/TelemetryBase.hpp index 969f93213..fcb9f5105 100644 --- a/include/graphblas/utils/telemetry/TelemetryBase.hpp +++ b/include/graphblas/utils/telemetry/TelemetryBase.hpp @@ -17,31 +17,37 @@ /* * @author Alberto Scolari - * @date 14th February, 2023 + * @date 1st March, 2023 */ #ifndef _H_GRB_UTILS_TELEMETRY_TELEMETRY_BASE #define _H_GRB_UTILS_TELEMETRY_TELEMETRY_BASE -#include "TelemetryToken.hpp" +#include "TelemetryController.hpp" namespace grb { namespace utils { namespace telemetry { + /** + * + * + * @tparam TelControllerType + * @tparam enabled + */ template< - typename TelTokenType, - bool enabled = TelTokenType::enabled + typename TelControllerType, + bool enabled = TelControllerType::enabled > class TelemetryBase { public: - static_assert( is_telemetry_token< TelTokenType >::value, - "type TelTokenType does not implement Telemetry Token interface" ); + static_assert( is_telemetry_controller< TelControllerType >::value, + "type TelControllerType does not implement Telemetry Controller interface" ); - using self_t = TelemetryBase< TelTokenType, enabled >; + using self_t = TelemetryBase< TelControllerType, enabled >; TelemetryBase() = default; - TelemetryBase( const TelTokenType & tt ) { + TelemetryBase( const TelControllerType & tt ) { ( void ) tt; } @@ -54,35 +60,35 @@ namespace grb { template< - typename TelTokenType - > class TelemetryBase< TelTokenType, true > { + typename TelControllerType + > class TelemetryBase< TelControllerType, true > { - const TelTokenType & telemetry_token; + const TelControllerType & telemetry_Controller; public: - static_assert( is_telemetry_token< TelTokenType >::value, - "type TelTokenType does not implement Telemetry Token interface" ); + static_assert( is_telemetry_controller< TelControllerType >::value, + "type TelControllerType does not implement Telemetry Controller interface" ); - using self_t = TelemetryBase< TelTokenType, true >; + using self_t = TelemetryBase< TelControllerType, true >; - TelemetryBase( const TelTokenType & tt ): telemetry_token( tt ) {} + TelemetryBase( const TelControllerType & tt ): telemetry_Controller( tt ) {} - TelemetryBase( const self_t & tb ) : telemetry_token( tb.telemetry_token ) {} + TelemetryBase( const self_t & tb ) : telemetry_Controller( tb.telemetry_Controller ) {} self_t & operator=( const self_t & ) = delete; - bool is_active() const { return telemetry_token.is_active(); } + bool is_active() const { return telemetry_Controller.is_active(); } }; // always actibe base, especially for prototyping scenarios - template<> class TelemetryBase< TelemetryTokenAlwaysOn, true > { + template<> class TelemetryBase< TelemetryControllerAlwaysOn, true > { public: - static_assert( is_telemetry_token< TelemetryTokenAlwaysOn >::value, - "type TelTokenType does not implement Telemetry Token interface" ); + static_assert( is_telemetry_controller< TelemetryControllerAlwaysOn >::value, + "type TelControllerType does not implement Telemetry Controller interface" ); - using self_t = TelemetryBase< TelemetryTokenAlwaysOn, true >; + using self_t = TelemetryBase< TelemetryControllerAlwaysOn, true >; - TelemetryBase( const TelemetryTokenAlwaysOn & tt ) { (void) tt; } + TelemetryBase( const TelemetryControllerAlwaysOn & tt ) { (void) tt; } TelemetryBase( const self_t & tb ) = default; diff --git a/include/graphblas/utils/telemetry/TelemetryController.hpp b/include/graphblas/utils/telemetry/TelemetryController.hpp new file mode 100644 index 000000000..63a013eab --- /dev/null +++ b/include/graphblas/utils/telemetry/TelemetryController.hpp @@ -0,0 +1,319 @@ + +/* + * Copyright 2023 Huawei Technologies Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @author Alberto Scolari + * @date 1st March, 2023 + * + * This file defines the basic functionalities for Telemetry Controllers, i.e., + * objects that enable/disable telemetry at compile-time and runtime. + * + * A telemetry controller can be \b enabled (at compile-time) to produce the code for telemetry and must be + * \b activated at runtime to emit actual telemetry information. Activation depends on runtime information + * (e.g., user's input) and may change dynamically \a after the controller is instantiated. + * If a controller is \b disabled, no code for compile-time is generated in any compliant telemetry functionality; + * hence, any (de)activation of a disabled telemetry controller is simply ignored and produces no result. + * In any case, the code must compile under all conditions, in order to avoid verbose + * pre-processing \a #if conditions. + * + * A typical instantiation of a telemetry controller in a user's application looks as follows: + * + * \code{.cpp} + * ENABLE_TELEMETRY_CONTROLLER( my_controller_t ) + * DEFINE_TELEMETRY_CONTROLLER( my_controller_t ) + * + * int main() { + * my_controller_t my_controller( true ); + * if( my_controller.is_active() ) { + * std::cout << "my_controller is active"; + * } else { + * std::cout << "my_controller is NOT active"; + * if( !my_controller_t::enabled ) { + * std::cout << ", because it was deactivated at compile-time"; + * } + * } + * std::cout << std::endl; + * return 0; + * } + * \endcode + * + * where the activation directive \a ENABLE_TELEMETRY_CONTROLLER is present only if the controller + * is to be activated. Users should indeed comment/uncomment this directive do disable/enable telemetry + * while debugging, or may add extra pre-processing logic to control it during compilation, like + * + * \code{.cpp} + * #ifdef __I_WANT_my_controller_t_ENABLED__ + * ENABLE_TELEMETRY_CONTROLLER( my_controller_t ) + * #endif + * DEFINE_TELEMETRY_CONTROLLER( my_controller_t ) + * \endcode + * + * Note that the \a ENABLE_TELEMETRY_CONTROLLER directive (if present) must come \b before the + * \a DEFINE_TELEMETRY_CONTROLLER directive, otherwise compilation errors occur. + */ + +#ifndef _H_GRB_UTILS_TELEMETRY_TELEMETRY_CONTROLLER +#define _H_GRB_UTILS_TELEMETRY_TELEMETRY_CONTROLLER + +#include +#include // std::declval< T >() + +namespace grb { + namespace utils { + namespace telemetry { + + /** + * Returns whether a telemetry controller is enabled at compile-time. By default + * it is \b not. + * + * @tparam T type associated to the telemetry controller + * @return true never + * @return false always + */ + template< typename T > constexpr bool is_controller_enabled() { return false; } + + /** + * Class that encapsulates the logic to enable/disable telemetry at compile-time + * or at runtime. + * + * Telemetry can be completely disabled at compile-time (e.g., to avoid any code generation + * and overhead) or can be controlled at runtime, based on external conditions (e.g., + * user's input, cluster node number, ...). + * + * In the following, the field #enabled encodes the compile-time information, while + * the field \a active (if present) and the corresponding getter #is_active() tell + * whether the controller is \a active at runtime. Hence, users of telemetry controllers should always + * use the #is_active() method to check whether telemetry is active, while implementations + * of telemetry controllers should implement this method also based on the value of the #enabled + * field, possibly "short-circuiting" when #enabled is \a false. This implementation does + * exactly this, disabling telemetry at compile-time and ignoring any runtime information. + * + * @tparam en whether telemetry is enabled (\p en = \a true has a dedicated template specialization) + */ + template< bool en > class TelemetryControllerBase { + public: + using self_t = TelemetryControllerBase< en >; + + /** + * Construct a new Telemetry Controller Base object with runtime information. + * + * HEre, runtime information is ignored, as this implementation disables any telemetry. + * + * @param _enabled whether telemetry is runtime-enabled (ignored here) + */ + TelemetryControllerBase( bool _enabled ) { + (void) _enabled; + } + + TelemetryControllerBase() = delete; + + TelemetryControllerBase( const self_t & ) = delete; + + TelemetryControllerBase& operator=( const self_t & ) = delete; + + /** + * Whether telemetry is runtime-active. + * + * @return true never here + * @return false always + */ + constexpr bool inline is_active() const { return false; } + + /** + * Set the active status of the telemetry controller. + * + * This \a disabled implementation ignores the input \p _active. + */ + void inline set_active( bool _active ) { + ( void ) _active; + } + + /** + * Whether telemetry is compile-time active (never here). + */ + static constexpr bool enabled = false; + }; + + /** + * Convenience definition fo an always-off telemetry controller. + */ + using TelemetryControllerAlwaysOff = TelemetryControllerBase< false >; + + /** + * Template specialization for compile-time enabled telemetry, which + * can be controlled at runtime. + * + * The controller is \b enabled by default, and its \a active status can be controlled + * at runtime via the constructor and the #set_active(bool) method. + */ + template<> class TelemetryControllerBase< true > { + public: + using self_t = TelemetryControllerBase< true >; + + /** + * Construct a new Telemetry oCntroller Base object, specifying the \a active state. + * + * @param _active whether the controller is \a active or not + */ + TelemetryControllerBase( bool _active ) : active( _active ) {} + + TelemetryControllerBase() = delete; + + TelemetryControllerBase( const self_t & ) = default; + + TelemetryControllerBase& operator=( const self_t & ) = delete; + + /** + * Tells whether the controller is \a active. + */ + bool is_active() const { return this->active; } + + /** + * Set the \a active status of the controller at runtime. + * + * @param _active whether to activate the controller + */ + void inline set_active( bool _active ) { + this->active = _active; + } + + /** + * Whether telemetry is compile-time active (here always). + */ + static constexpr bool enabled = true; + + protected: + bool active; + }; + + /** + * Always active controller, useful especially for prototyping scenarios. + */ + class TelemetryControllerAlwaysOn { + public: + TelemetryControllerAlwaysOn( bool _enabled ) { + (void) _enabled; + } + + TelemetryControllerAlwaysOn() = default; + + TelemetryControllerAlwaysOn( const TelemetryControllerAlwaysOn & ) = default; + + TelemetryControllerAlwaysOn& operator=( const TelemetryControllerAlwaysOn & ) = delete; + + /** + * Tells whether the controller is \a active, which is in this case always true. + */ + constexpr bool is_active() const { return true; } + + /** + * Set the active status of the telemetry controller. + * + * This \a disabled implementation ignores the input \p _active. + */ + void inline set_active( bool _active ) { + ( void ) _active; + } + + /** + * Whether telemetry is compile-time active (here always). + */ + static constexpr bool enabled = true; + }; + + /** + * SFINAE-based structure to check whether \p T is a telemetry controller, i.e. + * - it has a \a constexpr static field named \a enabled + * - it has an \a is_active() method + * - it has a \a set_active(bool) method + */ + template< typename T > struct is_telemetry_controller { + private: + template< typename U > static constexpr bool has_enabled_field( + typename std::enable_if< + std::is_same< typename std::decay< decltype( U::enabled ) >::type, bool >::value, + bool * >::type + ) { + return true; + } + + template< typename U > static constexpr bool has_enabled_field( ... ) { return false; } + + template< typename U > static constexpr bool has_is_active_method( + typename std::enable_if< + std::is_same< typename std::decay().is_active() ) + >::type, bool >::value, bool * >::type + ) { + return true; + } + + template< typename U > static constexpr bool has_is_active_method( ... ) { return false; } + + template< typename U > static constexpr bool has_set_active_method( + typename std::enable_if< + std::is_same< decltype( std::declval< U >().set_active( true ) ), void >::value, + bool * >::type + ) { + return true; + } + + template< typename U > static constexpr bool has_set_active_method( ... ) { return false; } + + public: + static constexpr bool value = has_enabled_field< T >( nullptr ) + && has_is_active_method< T >( nullptr ) && has_set_active_method< T >( nullptr ) ; + }; + } + + } +} + +// Name of the Controller Enabler, i.e., a type that controls whether a telemetry controller is enabled +#define __TELEMETRY_CONTROLLER_ENABLER_NAME( name ) __ ## name ## _Enabler + +// Name of the Telemetry Controller type +#define __TELEMETRY_CONTROLLER_NAME( name ) name ## _cls + +/** + * Defines a telemetry controller, i.e., a custom type derived from TelemetryControllerBase. + * + * This declaration requires the declaration of an associated controller enabler type, which controls + * whether the controller is enabled at compile-time; the controller is by default \b deactivated. + */ +#define DEFINE_TELEMETRY_CONTROLLER( name ) \ + class __TELEMETRY_CONTROLLER_ENABLER_NAME( name ) {}; \ + using name = class __TELEMETRY_CONTROLLER_NAME( name ) : \ + public grb::utils::telemetry::TelemetryControllerBase< \ + grb::utils::telemetry::is_controller_enabled< __TELEMETRY_CONTROLLER_ENABLER_NAME( name ) >() > { \ + public: \ + using base_t = grb::utils::telemetry::TelemetryControllerBase< \ + grb::utils::telemetry::is_controller_enabled< __TELEMETRY_CONTROLLER_ENABLER_NAME( name ) >() >; \ + __TELEMETRY_CONTROLLER_NAME( name )( bool _enabled ) : base_t( _enabled ) {} \ + }; + +/** + * Enables a telemetry controller through its associated enabler type. + * + * Once enabled, it can be runtime activated. + */ +#define ENABLE_TELEMETRY_CONTROLLER( name ) class __TELEMETRY_CONTROLLER_ENABLER_NAME( name ); \ + namespace grb { namespace utils { namespace telemetry { \ + template<> constexpr bool is_controller_enabled< \ + __TELEMETRY_CONTROLLER_ENABLER_NAME( name ) >() { return true; } \ + } } } + +#endif // _H_GRB_UTILS_TELEMETRY_TELEMETRY_CONTROLLER diff --git a/include/graphblas/utils/telemetry/TelemetryToken.hpp b/include/graphblas/utils/telemetry/TelemetryToken.hpp deleted file mode 100644 index dabac3c2e..000000000 --- a/include/graphblas/utils/telemetry/TelemetryToken.hpp +++ /dev/null @@ -1,145 +0,0 @@ - -/* - * Copyright 2023 Huawei Technologies Co., Ltd. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * @author Alberto Scolari - * @date 14th February, 2023 - */ - -#ifndef _H_GRB_UTILS_TELEMETRY_TELEMETRY_TOKEN -#define _H_GRB_UTILS_TELEMETRY_TELEMETRY_TOKEN - -#include -#include - -namespace grb { - namespace utils { - namespace telemetry { - - template< typename T > constexpr bool is_token_enabled() { return false; } - - // OFF - template< bool en > class TelemetryTokenBase { - public: - using self_t = TelemetryTokenBase< en >; - - TelemetryTokenBase( bool _enabled ) { - (void) _enabled; - } - - TelemetryTokenBase() = delete; - - TelemetryTokenBase( const self_t & ) = delete; - - TelemetryTokenBase& operator=( const self_t & ) = delete; - - constexpr bool is_active() const { return false; } - - static constexpr bool enabled = false; - }; - - using TelemetryTokenAlwaysOff = TelemetryTokenBase< false >; - - template<> class TelemetryTokenBase< true > { - public: - using self_t = TelemetryTokenBase< true >; - - TelemetryTokenBase( bool _active ) : active( _active ) {} - - TelemetryTokenBase() = delete; - - TelemetryTokenBase( const self_t & ) = delete; - - TelemetryTokenBase& operator=( const self_t & ) = delete; - - bool is_active() const { return this->active; } - - static constexpr bool enabled = true; - - protected: - const bool active; - }; - - // always active token, especially for prototyping scenarios - class TelemetryTokenAlwaysOn { - public: - TelemetryTokenAlwaysOn( bool _enabled ) { - (void) _enabled; - } - - TelemetryTokenAlwaysOn() = delete; - - TelemetryTokenAlwaysOn( const TelemetryTokenAlwaysOn & ) = delete; - - TelemetryTokenAlwaysOn& operator=( const TelemetryTokenAlwaysOn & ) = delete; - - constexpr bool is_active() const { return true; } - - static constexpr bool enabled = true; - }; - - - template< typename T > struct is_telemetry_token { - private: - template< typename U > static constexpr bool has_enabled_field( - typename std::enable_if< - std::is_same< typename std::decay< decltype( U::enabled ) >::type, bool >::value, - bool * >::type - ) { - return true; - } - - template< typename U > static constexpr bool has_enabled_field( ... ) { return false; } - - template< typename U > static constexpr bool has_is_active_method( - typename std::enable_if< - std::is_same< typename std::decay< decltype( std::declval< U >().is_active() ) >::type, bool >::value, - bool * >::type - ) { - return true; - } - - template< typename U > static constexpr bool has_is_active_method( ... ) { return false; } - - public: - static constexpr bool value = has_enabled_field< T >( nullptr ) && has_is_active_method< T >( nullptr ); - }; - } - - } -} - -#define __TELEMETRY_TOKEN_ENABLER_NAME( name ) __ ## name ## Enabler -#define __TELEMETRY_TOKEN_NAME( name ) name - -#define DECLARE_TELEMETRY_TOKEN( name ) \ - class __TELEMETRY_TOKEN_ENABLER_NAME( name ) {}; \ - template< typename T > class __TELEMETRY_TOKEN_NAME( name ) : \ - public grb::utils::telemetry::TelemetryTokenBase< grb::utils::telemetry::is_token_enabled< T >() > { \ - public: \ - using base_t = grb::utils::telemetry::TelemetryTokenBase< grb::utils::telemetry::is_token_enabled< T >() >; \ - __TELEMETRY_TOKEN_NAME( name )( bool _enabled ) : base_t( _enabled ) {} \ - }; - - -#define ACTIVATE_TOKEN( name ) namespace grb { namespace utils { namespace telemetry { \ - template<> constexpr bool is_token_enabled< __TELEMETRY_TOKEN_ENABLER_NAME( name ) >() { return true; } \ -} } } - -#define TELEMETRY_TOKEN_TYPE( name ) __TELEMETRY_TOKEN_NAME( name )< __TELEMETRY_TOKEN_ENABLER_NAME( name ) > - -#endif // _H_GRB_UTILS_TELEMETRY_TELEMETRY_TOKEN diff --git a/include/graphblas/utils/telemetry/Timeable.hpp b/include/graphblas/utils/telemetry/Timeable.hpp index 02dd85b9e..95d1bdfa2 100644 --- a/include/graphblas/utils/telemetry/Timeable.hpp +++ b/include/graphblas/utils/telemetry/Timeable.hpp @@ -30,13 +30,13 @@ namespace grb { namespace telemetry { template< - typename TelTokenType, - bool enabled = TelTokenType::enabled + typename TelControllerType, + bool enabled = TelControllerType::enabled > class Timeable { public: - using self_t = Timeable< TelTokenType, enabled >; + using self_t = Timeable< TelControllerType, enabled >; - Timeable( const TelTokenType & tt ) { + Timeable( const TelControllerType & tt ) { (void) tt; } @@ -61,11 +61,11 @@ namespace grb { }; - template< typename TelTokenType > class Timeable< TelTokenType, true > { + template< typename TelControllerType > class Timeable< TelControllerType, true > { public: - using self_t = Timeable< TelTokenType, true >; + using self_t = Timeable< TelControllerType, true >; - Timeable( const TelTokenType & tt ) : swatch( tt ) {} + Timeable( const TelControllerType & tt ) : swatch( tt ) {} Timeable( const self_t & ) = default; @@ -89,10 +89,10 @@ namespace grb { } private: - Stopwatch< TelTokenType > swatch; + Stopwatch< TelControllerType > swatch; }; - using StaticTimeable = Timeable< TelemetryTokenAlwaysOn, true >; + using StaticTimeable = Timeable< TelemetryControllerAlwaysOn, true >; } } diff --git a/tests/smoke/hpcg.cpp b/tests/smoke/hpcg.cpp index adba0339a..07c38cc99 100644 --- a/tests/smoke/hpcg.cpp +++ b/tests/smoke/hpcg.cpp @@ -99,39 +99,43 @@ using coord_t = size_t; constexpr Descriptor hpcg_desc = descriptors::dense; -DECLARE_TELEMETRY_TOKEN( DistOut ) -ACTIVATE_TOKEN( DistOut ) -using dist_token_t = TELEMETRY_TOKEN_TYPE( DistOut ); -using DistStream = grb::utils::telemetry::OutputStream< dist_token_t >; +// telemetry control: controllers and output stream types for telemetry +// they can be (de)activated at compile-time by (un)commenting the respective ENABLE_TELEMETRY_CONTROLLER() macro +ENABLE_TELEMETRY_CONTROLLER( dist_controller_t ) +DEFINE_TELEMETRY_CONTROLLER( dist_controller_t ) +using DistStream = grb::utils::telemetry::OutputStream< dist_controller_t >; -DECLARE_TELEMETRY_TOKEN( HPCGTelemetry ) -ACTIVATE_TOKEN( HPCGTelemetry ) -using hpcg_token_t = TELEMETRY_TOKEN_TYPE( HPCGTelemetry ); +ENABLE_TELEMETRY_CONTROLLER( hpcg_controller_t ) +DEFINE_TELEMETRY_CONTROLLER( hpcg_controller_t ) -DECLARE_TELEMETRY_TOKEN( MGTelemetry ) -ACTIVATE_TOKEN( MGTelemetry ) -using mg_token_t = TELEMETRY_TOKEN_TYPE( MGTelemetry ); +ENABLE_TELEMETRY_CONTROLLER( mg_controller_t ) +DEFINE_TELEMETRY_CONTROLLER( mg_controller_t ) -DECLARE_TELEMETRY_TOKEN( DBGToken ) -// ACTIVATE_TOKEN( DBGToken ) -using dbg_token_t = TELEMETRY_TOKEN_TYPE( DBGToken ); -using DBGStream = grb::utils::telemetry::OutputStream< dbg_token_t >; +// ENABLE_TELEMETRY_CONTROLLER( dbg_controller_t ) +DEFINE_TELEMETRY_CONTROLLER( dbg_controller_t ) +using DBGStream = grb::utils::telemetry::OutputStream< dbg_controller_t >; using duration_t = utils::telemetry::duration_nano_t; -using hpcg_csv_t = utils::telemetry::CSVWriter< hpcg_token_t, hpcg_token_t::enabled, size_t, duration_t >; -using mg_csv_t = utils::telemetry::CSVWriter< mg_token_t, mg_token_t::enabled, size_t, size_t, duration_t, duration_t >; +using hpcg_csv_t = utils::telemetry::CSVWriter< hpcg_controller_t, hpcg_controller_t::enabled, + size_t, duration_t >; +using mg_csv_t = utils::telemetry::CSVWriter< mg_controller_t, mg_controller_t::enabled, + size_t, size_t, duration_t, duration_t >; // assembled types for simulation runners and input/output structures -using smoother_runner_t = grb::algorithms::RedBlackGSSmootherRunner< HPCGTypes, mg_token_t, hpcg_desc >; +using smoother_runner_t = grb::algorithms::RedBlackGSSmootherRunner< HPCGTypes, + mg_controller_t, hpcg_desc >; using smoothing_data_t = typename smoother_runner_t::SmootherDataType; -using coarsener_runner_t = grb::algorithms::SingleMatrixCoarsener< HPCGTypes, mg_token_t, hpcg_desc >; +using coarsener_runner_t = grb::algorithms::SingleMatrixCoarsener< HPCGTypes, + mg_controller_t, hpcg_desc >; using coarsening_data_t = typename coarsener_runner_t::CoarseningDataType; -using mg_runner_t = MultiGridRunner< HPCGTypes, smoother_runner_t, coarsener_runner_t, mg_token_t, hpcg_desc, DBGStream >; +using mg_runner_t = MultiGridRunner< HPCGTypes, smoother_runner_t, coarsener_runner_t, + mg_controller_t, hpcg_desc, DBGStream >; using mg_data_t = typename mg_runner_t::MultiGridInputType; -using hpcg_runner_t = MultiGridCGRunner< HPCGTypes, mg_runner_t, hpcg_token_t, hpcg_desc, DBGStream >; +using hpcg_runner_t = MultiGridCGRunner< HPCGTypes, mg_runner_t, hpcg_controller_t, + hpcg_desc, DBGStream >; using hpcg_data_t = typename hpcg_runner_t::HPCGInputType; struct dotter : grb::utils::telemetry::OutputStreamLazy { @@ -219,32 +223,12 @@ static void print_system( * This routine is algorithm-agnositc, as long as the constructors of the data types meet the requirements * explained in \ref multigrid_allocate_data(). */ -template< typename T > T static next_pow_2( T n ) { - static_assert( std::is_integral< T >::value, "Integral required." ); - --n; - n |= ( n >> 1 ); - for( unsigned i = 1; i <= sizeof( T ) * 4; i *= 2 ) { - const unsigned shift = static_cast< T >( 1U ) << i; - n |= ( n >> shift ); - } - return n + 1; -} - -/** - * Allocates the data structure input to the various simulation steps (CG, multi-grid, coarsening, smoothing) - * for each level of the multi-grid. The input is the vector of system sizes \p mg_sizes, with sizes in - * monotonically \b decreasing order (finest system first). - * - * This routine is algorithm-agnositc, as long as the constructors of the data types meet the requirements - * explained in \ref multigrid_allocate_data(). - */ -static void allocate_system_structures( - std::vector< std::unique_ptr< mg_data_t > > &system_levels, - std::vector< std::unique_ptr< coarsening_data_t > > &coarsener_levels, - std::vector< std::unique_ptr< smoothing_data_t > > &smoother_levels, - std::unique_ptr< hpcg_data_t > &cg_system_data, - const std::vector< size_t > &mg_sizes, - const mg_token_t & mg_token, +static void allocate_system_structures( std::vector< std::unique_ptr< mg_data_t > > & system_levels, + std::vector< std::unique_ptr< coarsening_data_t > > & coarsener_levels, + std::vector< std::unique_ptr< smoothing_data_t > > & smoother_levels, + std::unique_ptr< hpcg_data_t > & cg_system_data, + const std::vector< size_t > & mg_sizes, + const mg_controller_t & mg_controller, DistStream & logger ) { grb::utils::Timer timer; @@ -253,7 +237,7 @@ static void allocate_system_structures( cg_system_data = std::unique_ptr< hpcg_data_t >( data ); logger << "allocating data for the MultiGrid simulation..."; timer.reset(); - multigrid_allocate_data( system_levels, coarsener_levels, smoother_levels, mg_sizes, mg_token ); + multigrid_allocate_data( system_levels, coarsener_levels, smoother_levels, mg_sizes, mg_controller ); double time = timer.time(); logger << " time (ms) " << time << std::endl; @@ -283,7 +267,7 @@ static void build_3d_system( std::vector< std::unique_ptr< smoothing_data_t > > &smoother_levels, std::unique_ptr< hpcg_data_t > &cg_system_data, const simulation_input & in, - const mg_token_t & tt, + const mg_controller_t & tt, DistStream & logger ) { constexpr size_t DIMS = 3; @@ -364,13 +348,19 @@ void grbProgram( const simulation_input & in, struct output & out ) { const size_t pid = spmd<>::pid(); grb::utils::Timer timer; - dist_token_t dist( pid == 0 ); - class MyNumPunct : public std::numpunct { - // protected: - char do_thousands_sep() const override { return '\''; } - std::string do_grouping() const override { return "\03"; } + // standard logger: active only on master node + dist_controller_t dist( pid == 0 ); + // separate thousands when printing integers + class IntegerSeparation : public std::numpunct< char > { + // protected: + char do_thousands_sep() const override { + return '\''; + } + std::string do_grouping() const override { + return "\03"; + } }; - std::locale old_locale = std::cout.imbue( std::locale( std::cout.getloc(), new MyNumPunct ) ); + std::locale old_locale = std::cout.imbue( std::locale( std::cout.getloc(), new IntegerSeparation ) ); DistStream logger( dist, std::cout ); logger << "beginning input generation..." << std::endl; @@ -378,13 +368,14 @@ void grbProgram( const simulation_input & in, struct output & out ) { // wrap hpcg_data inside a unique_ptr to forget about cleaning chores std::unique_ptr< hpcg_data_t > hpcg_state; - // log HPCG by default on master - hpcg_token_t hpcg_token( pid == 0 ); - // log Mg and smoother only if the user requested it - mg_token_t mg_token( pid == 0 && in.mg_log ); + // measure HPCG execution time by default on master + hpcg_controller_t hpcg_controller( pid == 0 ); + // measure MG and smoother only if the user requested it + mg_controller_t mg_controller( pid == 0 && in.mg_log ); - dbg_token_t dbg_token( pid == 0 ); - DBGStream dbg_stream( dbg_token, std::cout ); + // trace execution of CG and MG only on master + dbg_controller_t dbg_controller( pid == 0 ); + DBGStream dbg_stream( dbg_controller, std::cout ); // define the main HPCG runner and initialize the options of its components coarsener_runner_t coarsener; @@ -392,13 +383,14 @@ void grbProgram( const simulation_input & in, struct output & out ) { smoother.presmoother_steps = smoother.postsmoother_steps = in.smoother_steps; smoother.non_recursive_smooth_steps = 1UL; mg_runner_t mg_runner( smoother, coarsener, dbg_stream ); - hpcg_runner_t hpcg_runner( hpcg_token, mg_runner, dbg_stream ); + hpcg_runner_t hpcg_runner( hpcg_controller, mg_runner, dbg_stream ); hpcg_runner.tolerance = residual_zero; hpcg_runner.with_preconditioning = ! in.no_preconditioning; timer.reset(); // build the entire multi-grid system - build_3d_system( mg_runner.system_levels, coarsener.coarsener_levels, smoother.levels, hpcg_state, in, mg_token, logger ); + build_3d_system( mg_runner.system_levels, coarsener.coarsener_levels, smoother.levels, + hpcg_state, in, mg_controller, logger ); double input_duration = timer.time(); logger << "input generation time (ms): " << input_duration << std::endl; @@ -445,8 +437,9 @@ void grbProgram( const simulation_input & in, struct output & out ) { out.inner_test_repetitions = 0; out.times.useful = 0.0; - hpcg_csv_t hpcg_csv( hpcg_token, { "repetition", "time" } ); - mg_csv_t mg_csv( mg_token, { "repetition", "level", "mg time", "smoother time" } ); + // initialize CSV writers (if activated) + hpcg_csv_t hpcg_csv( hpcg_controller, { "repetition", "time" } ); + mg_csv_t mg_csv( mg_controller, { "repetition", "level", "mg time", "smoother time" } ); // do benchmark for( size_t i = 0; i < in.inner_test_repetitions; ++i ) { From d10adaca43f212665856bac2e55302e3663873af Mon Sep 17 00:00:00 2001 From: Alberto Scolari Date: Thu, 23 Feb 2023 17:54:16 +0100 Subject: [PATCH 22/28] polishing a few telemetry functionalities, linting added code, documenting telemetry --- .../algorithms/hpcg/average_coarsener.hpp | 68 ++--- .../algorithms/hpcg/greedy_coloring.hpp | 33 ++- .../hpcg/single_point_coarsener.hpp | 61 ++--- .../algorithms/hpcg/system_builder.hpp | 23 +- .../algorithms/hpcg/system_building_utils.hpp | 146 +++++------ .../multigrid/multigrid_building_utils.hpp | 6 +- .../algorithms/multigrid/multigrid_cg.hpp | 56 ++-- .../algorithms/multigrid/multigrid_data.hpp | 8 +- .../multigrid/multigrid_v_cycle.hpp | 73 ++---- .../multigrid/red_black_gauss_seidel.hpp | 66 +++-- .../multigrid/single_matrix_coarsener.hpp | 30 ++- .../utils/iterators/IteratorValueAdaptor.hpp | 6 +- .../utils/multigrid/array_vector_storage.hpp | 23 +- .../multigrid/dynamic_vector_storage.hpp | 42 ++- .../halo_matrix_generator_iterator.hpp | 39 +-- .../linearized_halo_ndim_iterator.hpp | 77 +++--- .../multigrid/linearized_halo_ndim_system.hpp | 166 ++++++------ .../multigrid/linearized_ndim_iterator.hpp | 57 ++-- .../multigrid/linearized_ndim_system.hpp | 86 +++--- .../graphblas/utils/multigrid/ndim_system.hpp | 28 +- .../graphblas/utils/multigrid/ndim_vector.hpp | 46 ++-- .../graphblas/utils/telemetry/CSVWriter.hpp | 248 ++++++++++++------ .../utils/telemetry/OutputStream.hpp | 211 +++++++++++---- .../graphblas/utils/telemetry/Stopwatch.hpp | 186 ++++++++++--- .../graphblas/utils/telemetry/Telemetry.hpp | 25 +- .../utils/telemetry/TelemetryBase.hpp | 65 +++-- .../utils/telemetry/TelemetryController.hpp | 143 +++++----- .../graphblas/utils/telemetry/Timeable.hpp | 40 ++- tests/smoke/hpcg.cpp | 190 +++++++------- 29 files changed, 1293 insertions(+), 955 deletions(-) diff --git a/include/graphblas/algorithms/hpcg/average_coarsener.hpp b/include/graphblas/algorithms/hpcg/average_coarsener.hpp index 6af5e5ff7..eb3853c61 100644 --- a/include/graphblas/algorithms/hpcg/average_coarsener.hpp +++ b/include/graphblas/algorithms/hpcg/average_coarsener.hpp @@ -24,12 +24,12 @@ #ifndef _H_GRB_ALGORITHMS_AVERAGE_COARSENER #define _H_GRB_ALGORITHMS_AVERAGE_COARSENER -#include #include -#include -#include #include +#include +#include #include +#include #include #include @@ -87,16 +87,21 @@ namespace grb { ) noexcept : _i( i ), _j( j ), - _value( value ) - {} + _value( value ) {} _ValueGenerator( const _ValueGenerator & ) = default; _ValueGenerator & operator=( const _ValueGenerator & ) = default; - inline RowIndexType i() const { return _i; } - inline ColumnIndexType j() const { return _j; } - inline ValueType v() const { return _value; } + inline RowIndexType i() const { + return _i; + } + inline ColumnIndexType j() const { + return _j; + } + inline ValueType v() const { + return _value; + } private: RowIndexType _i; @@ -108,12 +113,12 @@ namespace grb { using iterator_category = std::random_access_iterator_tag; using value_type = _ValueGenerator; using pointer = const value_type; - using reference = const value_type&; + using reference = const value_type &; using difference_type = typename LinearSystemIterType::difference_type; - AverageGeneratorIterator( const SelfType &o ) = default; + AverageGeneratorIterator( const SelfType & o ) = default; - AverageGeneratorIterator( SelfType &&o ) = default; + AverageGeneratorIterator( SelfType && o ) = default; SelfType & operator=( const SelfType & ) = default; @@ -123,11 +128,11 @@ namespace grb { * Advances \c this by 1 in constant time. */ SelfType & operator++() noexcept { - (void) ++_subspace_iter; + (void)++_subspace_iter; size_t subspace_position = _subspace_iter->get_linear_position(); // std::cout << "subspace_position " << subspace_position << std::endl; if( subspace_position == _num_neighbors ) { - (void) ++_sys_iter; + (void)++_sys_iter; _subspace_iter = _finer_subspace->begin(); } update_coords(); @@ -150,21 +155,21 @@ namespace grb { /** * Computes the difference between \c this and \p o as integer. */ - difference_type operator-( const SelfType &o ) const { + difference_type operator-( const SelfType & o ) const { return this->_sys_iter - o._sys_iter; } /** * Returns whether \c this and \p o differ. */ - bool operator!=( const SelfType &o ) const { + bool operator!=( const SelfType & o ) const { return this->_sys_iter != o._sys_iter; } /** * Returns whether \c this and \p o are equal. */ - bool operator==( const SelfType &o ) const { + bool operator==( const SelfType & o ) const { return ! this->operator!=( o ); } @@ -198,9 +203,9 @@ namespace grb { } private: - const LinearSystemType *_lin_sys; - const LinearSystemType *_finer_subspace; - const ArrayType *_steps; + const LinearSystemType * _lin_sys; + const LinearSystemType * _finer_subspace; + const ArrayType * _steps; CoordType _num_neighbors; LinearSystemIterType _sys_iter; LinearSystemIterType _subspace_iter; @@ -217,9 +222,9 @@ namespace grb { * @param steps ratios per dimension between finer and coarser system */ AverageGeneratorIterator( - const LinearSystemType &system, - const LinearSystemType &finer_subspace, - const ArrayType &steps + const LinearSystemType & system, + const LinearSystemType & finer_subspace, + const ArrayType & steps ) noexcept : _lin_sys( &system ), _finer_subspace( &finer_subspace ), @@ -246,7 +251,7 @@ namespace grb { ColumnIndexType s = 1; for( size_t i = 0; i < DIMS; i++ ) { finer += s * _subspace_iter->get_position()[ i ]; - s *= (*_steps)[ i ]; + s *= ( *_steps )[ i ]; finer += s * _sys_iter->get_position()[ i ]; s *= _lin_sys->get_sizes()[ i ]; } @@ -280,8 +285,8 @@ namespace grb { * otherwise an exception is raised. */ AverageCoarsenerBuilder( - const ArrayType &_finer_sizes, - const ArrayType &_coarser_sizes + const ArrayType & _finer_sizes, + const ArrayType & _coarser_sizes ) : system( _coarser_sizes.begin(), _coarser_sizes.end() ), _finer_subspace( _coarser_sizes.cbegin(), _coarser_sizes.cend() ), @@ -291,10 +296,8 @@ namespace grb { // finer size MUST be an exact multiple of coarser_size std::ldiv_t ratio = std::ldiv( _finer_sizes[ i ], _coarser_sizes[ i ] ); if( ratio.quot < 2 || ratio.rem != 0 ) { - throw std::invalid_argument( - std::string( "finer size of dimension " ) + std::to_string( i ) + - std::string( "is not an exact multiple of coarser size" ) - ); + throw std::invalid_argument( std::string( "finer size of dimension " ) + + std::to_string( i ) + std::string( "is not an exact multiple of coarser size" ) ); } steps[ i ] = ratio.quot; } @@ -338,10 +341,9 @@ namespace grb { grb::utils::multigrid::ArrayVectorStorage< DIMS, CoordType > > system; grb::utils::multigrid::LinearizedNDimSystem< CoordType, grb::utils::multigrid::ArrayVectorStorage< DIMS, CoordType > > _finer_subspace; - - grb::utils::multigrid::ArrayVectorStorage< DIMS, CoordType > steps; ///< array of steps, i.e. how much each column coordinate (finer system) must be - //// incremented when incrementing the row coordinates; is is the ration between - //// #finer_sizes and row_generator#physical_sizes + grb::utils::multigrid::ArrayVectorStorage< DIMS, CoordType > steps;///< array of steps, i.e. how much each column coordinate (finer system) must be + //// incremented when incrementing the row coordinates; is is the ration between + //// #finer_sizes and row_generator#physical_sizes }; } // namespace algorithms diff --git a/include/graphblas/algorithms/hpcg/greedy_coloring.hpp b/include/graphblas/algorithms/hpcg/greedy_coloring.hpp index 5519a6504..bb4759d6f 100644 --- a/include/graphblas/algorithms/hpcg/greedy_coloring.hpp +++ b/include/graphblas/algorithms/hpcg/greedy_coloring.hpp @@ -24,8 +24,8 @@ #ifndef _H_GRB_ALGORITHMS_HPCG_GREEDY_COLORING #define _H_GRB_ALGORITHMS_HPCG_GREEDY_COLORING -#include #include +#include #include @@ -75,16 +75,15 @@ namespace grb { typename CoordType, bool lowest_color_first = true > void hpcg_greedy_color_ndim_system( - const grb::utils::multigrid::LinearizedHaloNDimSystem< DIMS, CoordType > &system, - std::vector< CoordType > &row_colors, - std::vector< CoordType > &color_counters, + const grb::utils::multigrid::LinearizedHaloNDimSystem< DIMS, CoordType > & system, + std::vector< CoordType > & row_colors, + std::vector< CoordType > & color_counters, bool reorder_rows_per_color = false ) { - CoordType nrows = system.system_size(); row_colors.insert( row_colors.begin(), nrows, nrows ); // value `nrows' means `uninitialized'; initialized colors go from 0 to nrow-1 CoordType totalColors = 1; - row_colors[0] = 0; // first point gets color 0 + row_colors[ 0 ] = 0; // first point gets color 0 // Finds colors in a greedy (a likely non-optimal) fashion. typename grb::utils::multigrid::LinearizedHaloNDimSystem< DIMS, CoordType >::Iterator begin = system.begin(); @@ -106,9 +105,9 @@ namespace grb { if( curCol < curRow ) { assert( row_colors[ curCol ] < nrows ); // if curCol < curRow, curCol has already a color assigned std::vector< bool >::reference color_is_assigned = assigned[ row_colors[ curCol ] ]; - if( !color_is_assigned ) { + if( ! color_is_assigned ) { // count how many colors are already assigned - (void) currentlyAssigned++; + (void)currentlyAssigned++; } // track which colors are assigned color_is_assigned = true; @@ -122,7 +121,7 @@ namespace grb { if( lowest_color_first ) { // here, assign colors greedily starting from the lowest available one for( CoordType j = 0; j < totalColors; ++j ) { - if( !assigned[ j ] ) { + if( ! assigned[ j ] ) { // if no neighbor with this color, use it for this row row_colors[ curRow ] = j; break; @@ -132,7 +131,7 @@ namespace grb { // here, assign colors greedily starting from the highest available one for( CoordType j = totalColors; j > 0; --j ) { CoordType color = j - 1; - if( !assigned[ color ] ) { + if( ! assigned[ color ] ) { // if no neighbor with this color, use it for this row row_colors[ curRow ] = color; break; @@ -143,7 +142,7 @@ namespace grb { assert( row_colors[ curRow ] == nrows ); if( row_colors[ curRow ] == nrows ) { row_colors[ curRow ] = totalColors; - (void) totalColors++; + (void)totalColors++; } else { assert( 0 ); // should never get here } @@ -153,7 +152,7 @@ namespace grb { #ifdef _DEBUG std::cout << "assigned colors: " << totalColors << " [ -> ]\n"; - for( size_t i = 0; i < row_colors.size(); i++ ){ + for( size_t i = 0; i < row_colors.size(); i++ ) { std::cout << i << " -> " << row_colors[ i ] << ", "; } std::cout << std::endl; @@ -162,21 +161,21 @@ namespace grb { // count number of vertices per color color_counters.insert( color_counters.begin(), totalColors, 0 ); for( CoordType i = 0; i < nrows; ++i ) { - (void) color_counters[ row_colors[ i ] ]++; + (void)color_counters[ row_colors[ i ] ]++; } - if( !reorder_rows_per_color ) { + if( ! reorder_rows_per_color ) { return; } // form in-place prefix scan CoordType old = 0, old0; for( CoordType i = 1; i < totalColors; ++i ) { - old0 = color_counters[i]; - color_counters[i] = color_counters[i-1] + old; + old0 = color_counters[ i ]; + color_counters[ i ] = color_counters[ i - 1 ] + old; old = old0; } - color_counters[0] = 0; + color_counters[ 0 ] = 0; // translate `colors' into a permutation for( CoordType i = 0; i < nrows; ++i ) { diff --git a/include/graphblas/algorithms/hpcg/single_point_coarsener.hpp b/include/graphblas/algorithms/hpcg/single_point_coarsener.hpp index a3826c9c0..92ef47263 100644 --- a/include/graphblas/algorithms/hpcg/single_point_coarsener.hpp +++ b/include/graphblas/algorithms/hpcg/single_point_coarsener.hpp @@ -24,11 +24,11 @@ #ifndef _H_GRB_ALGORITHMS_HPCG_SINGLE_POINT_COARSENER #define _H_GRB_ALGORITHMS_HPCG_SINGLE_POINT_COARSENER -#include #include +#include +#include #include #include -#include #include #include @@ -70,8 +70,7 @@ namespace grb { using RowIndexType = CoordType; ///< numeric type of rows using ColumnIndexType = CoordType; - using LinearSystemType = grb::utils::multigrid::LinearizedNDimSystem< CoordType, - grb::utils::multigrid::ArrayVectorStorage< DIMS, CoordType > >; + using LinearSystemType = grb::utils::multigrid::LinearizedNDimSystem< CoordType, grb::utils::multigrid::ArrayVectorStorage< DIMS, CoordType > >; using LinearSystemIterType = typename LinearSystemType::Iterator; using SelfType = SinglePointCoarsenerIterator< DIMS, CoordType, ValueType >; using ArrayType = std::array< CoordType, DIMS >; @@ -85,15 +84,18 @@ namespace grb { ColumnIndexType j ) noexcept : _i( i ), - _j( j ) - {} + _j( j ) {} _HPCGValueGenerator( const _HPCGValueGenerator & ) = default; _HPCGValueGenerator & operator=( const _HPCGValueGenerator & ) = default; - inline RowIndexType i() const { return _i; } - inline ColumnIndexType j() const { return _j; } + inline RowIndexType i() const { + return _i; + } + inline ColumnIndexType j() const { + return _j; + } inline ValueType v() const { return static_cast< ValueType >( 1 ); } @@ -107,12 +109,12 @@ namespace grb { using iterator_category = std::random_access_iterator_tag; using value_type = _HPCGValueGenerator; using pointer = const value_type; - using reference = const value_type&; + using reference = const value_type &; using difference_type = typename LinearSystemIterType::difference_type; - SinglePointCoarsenerIterator( const SelfType &o ) = default; + SinglePointCoarsenerIterator( const SelfType & o ) = default; - SinglePointCoarsenerIterator( SelfType &&o ) = default; + SinglePointCoarsenerIterator( SelfType && o ) = default; SelfType & operator=( const SelfType & ) = default; @@ -122,7 +124,7 @@ namespace grb { * Advances \c this by 1 in constant time. */ SelfType & operator++() noexcept { - (void) ++_sys_iter; + (void)++_sys_iter; update_coords(); return *this; } @@ -139,21 +141,21 @@ namespace grb { /** * Computes the difference between \c this and \p o as integer. */ - difference_type operator-( const SelfType &o ) const { + difference_type operator-( const SelfType & o ) const { return this->_sys_iter - o._sys_iter; } /** * Returns whether \c this and \p o differ. */ - bool operator!=( const SelfType &o ) const { + bool operator!=( const SelfType & o ) const { return this->_sys_iter != o._sys_iter; } /** * Returns whether \c this and \p o are equal. */ - bool operator==( const SelfType &o ) const { + bool operator==( const SelfType & o ) const { return ! this->operator!=( o ); } @@ -187,8 +189,8 @@ namespace grb { } private: - const LinearSystemType *_lin_sys; - const ArrayType *_steps; + const LinearSystemType * _lin_sys; + const ArrayType * _steps; LinearSystemIterType _sys_iter; value_type _val; @@ -201,8 +203,8 @@ namespace grb { * @param steps ratios per dimension between finer and coarser system */ SinglePointCoarsenerIterator( - const LinearSystemType &system, - const ArrayType &steps + const LinearSystemType & system, + const ArrayType & steps ) noexcept : _lin_sys( &system ), _steps( &steps ), @@ -225,7 +227,7 @@ namespace grb { ColumnIndexType finer = 0; ColumnIndexType s = 1; for( size_t i = 0; i < DIMS; i++ ) { - s *= (*_steps)[ i ]; + s *= ( *_steps )[ i ]; finer += s * _sys_iter->get_position()[ i ]; s *= _lin_sys->get_sizes()[ i ]; } @@ -259,17 +261,17 @@ namespace grb { * otherwise an exception is raised. */ SinglePointCoarsenerBuilder( - const ArrayType &_finer_sizes, - const ArrayType &_coarser_sizes - ) : system( _coarser_sizes.begin(), _coarser_sizes.end() ) { + const ArrayType & _finer_sizes, + const ArrayType & _coarser_sizes + ) : + system( _coarser_sizes.begin(), + _coarser_sizes.end() ) + { for( size_t i = 0; i < DIMS; i++ ) { // finer size MUST be an exact multiple of coarser_size std::ldiv_t ratio = std::ldiv( _finer_sizes[ i ], _coarser_sizes[ i ] ); if( ratio.quot < 2 || ratio.rem != 0 ) { - throw std::invalid_argument( - std::string( "finer size of dimension " ) + std::to_string( i ) + - std::string( "is not an exact multiple of coarser size" ) - ); + throw std::invalid_argument( std::string( "finer size of dimension " ) + std::to_string( i ) + std::string( "is not an exact multiple of coarser size" ) ); } steps[ i ] = ratio.quot; } @@ -311,11 +313,10 @@ namespace grb { grb::utils::multigrid::ArrayVectorStorage< DIMS, CoordType > > system; ArrayType steps; ///< array of steps, i.e. how much each column coordinate (finer system) must be - //// incremented when incrementing the row coordinates; is is the ration between - //// #finer_sizes and row_generator#physical_sizes + //// incremented when incrementing the row coordinates; is is the ration between + //// #finer_sizes and row_generator#physical_sizes }; } // namespace algorithms } // namespace grb #endif // _H_GRB_ALGORITHMS_HPCG_SINGLE_POINT_COARSENER - diff --git a/include/graphblas/algorithms/hpcg/system_builder.hpp b/include/graphblas/algorithms/hpcg/system_builder.hpp index 94d1565f2..84600414c 100644 --- a/include/graphblas/algorithms/hpcg/system_builder.hpp +++ b/include/graphblas/algorithms/hpcg/system_builder.hpp @@ -34,14 +34,13 @@ #include #include #include +#include #include #include #include #include #include #include -#include -#include #include @@ -73,8 +72,11 @@ namespace grb { HPCGDiagGenerator & operator=( const HPCGDiagGenerator & ) = default; - inline ValueType operator()( const CoordType &i, const CoordType &j ) const noexcept { - return j == i ? _diag: _non_diag; + inline ValueType operator()( + const CoordType & i, + const CoordType & j + ) const noexcept { + return j == i ? _diag : _non_diag; } ValueType _diag; @@ -82,8 +84,8 @@ namespace grb { }; using HaloSystemType = grb::utils::multigrid::LinearizedHaloNDimSystem< DIMS, CoordType >; - using Iterator = grb::utils::multigrid::HaloMatrixGeneratorIterator< DIMS, CoordType, - ValueType, HPCGDiagGenerator >; + using Iterator = grb::utils::multigrid::HaloMatrixGeneratorIterator< DIMS, + CoordType, ValueType, HPCGDiagGenerator >; /** * Construct a new HPCGSystemBuilder object from the data of the physical system. @@ -94,7 +96,7 @@ namespace grb { * @param non_diag value outside the diagonal, for element-element interaction */ HPCGSystemBuilder( - const std::array< CoordType, DIMS > &sizes, + const std::array< CoordType, DIMS > & sizes, CoordType halo, ValueType diag, ValueType non_diag @@ -116,9 +118,11 @@ namespace grb { HPCGSystemBuilder( HPCGSystemBuilder< DIMS, CoordType, ValueType > && ) = default; - HPCGSystemBuilder< DIMS, CoordType, ValueType > & operator=( const HPCGSystemBuilder< DIMS, CoordType, ValueType > & ) = default; + HPCGSystemBuilder< DIMS, CoordType, ValueType > & operator=( + const HPCGSystemBuilder< DIMS, CoordType, ValueType > & ) = default; - HPCGSystemBuilder< DIMS, CoordType, ValueType > & operator=( HPCGSystemBuilder< DIMS, CoordType, ValueType > && ) = default; + HPCGSystemBuilder< DIMS, CoordType, ValueType > & operator=( + HPCGSystemBuilder< DIMS, CoordType, ValueType > && ) = default; /** * Number of elements of the mesh. @@ -175,4 +179,3 @@ namespace grb { } // namespace grb #endif // _H_GRB_ALGORITHMS_HPCG_SYSTEM_BUILDER - diff --git a/include/graphblas/algorithms/hpcg/system_building_utils.hpp b/include/graphblas/algorithms/hpcg/system_building_utils.hpp index b86564def..9f3fdf583 100644 --- a/include/graphblas/algorithms/hpcg/system_building_utils.hpp +++ b/include/graphblas/algorithms/hpcg/system_building_utils.hpp @@ -24,23 +24,25 @@ #ifndef _H_GRB_ALGORITHMS_HPCG_SYSTEM_BUILDING_UTILS #define _H_GRB_ALGORITHMS_HPCG_SYSTEM_BUILDING_UTILS +#include #include #include +#include #include -#include -#include #include +#include #include -#include #include #include +#include +#include #include -#include "system_builder.hpp" -#include "single_point_coarsener.hpp" #include "average_coarsener.hpp" #include "greedy_coloring.hpp" +#include "single_point_coarsener.hpp" +#include "system_builder.hpp" namespace grb { namespace algorithms { @@ -82,8 +84,8 @@ namespace grb { typename CoordType, typename NonzeroType > void hpcg_build_multigrid_generators( - const HPCGSystemParams< DIMS, NonzeroType > ¶ms, - std::vector< grb::algorithms::HPCGSystemBuilder< DIMS, CoordType, NonzeroType > > &mg_generators + const HPCGSystemParams< DIMS, NonzeroType > & params, + std::vector< grb::algorithms::HPCGSystemBuilder< DIMS, CoordType, NonzeroType > > & mg_generators ) { static_assert( DIMS > 0, "DIMS must be > 0" ); @@ -104,28 +106,23 @@ namespace grb { coord_sizes.begin() ); // generate hierarchical coarseners - for( size_t coarsening_level = 0UL; - min_physical_size >= params.min_phys_size && coarsening_level <= params.max_levels; - coarsening_level++ ) { + for( size_t coarsening_level = 0UL; min_physical_size >= params.min_phys_size + && coarsening_level <= params.max_levels; coarsening_level++ ) { // build generator - mg_generators.emplace_back( coord_sizes, params.halo_size, - params.diag_value, params.non_diag_value ); + mg_generators.emplace_back( coord_sizes, params.halo_size, params.diag_value, + params.non_diag_value ); // prepare for new iteration min_physical_size /= params.coarsening_step; - std::for_each( coord_sizes.begin(), coord_sizes.end(), - [ ¶ms ]( CoordType &v ) { - std::ldiv_t ratio = std::ldiv( v, params.coarsening_step ); - if( ratio.rem != 0 ) { - throw std::invalid_argument( - std::string( "system size " ) + std::to_string( v ) + - std::string( " is not divisible by " ) + - std::to_string( params.coarsening_step ) - ); - } - v = ratio.quot; - }); + std::for_each( coord_sizes.begin(), coord_sizes.end(), [ ¶ms ]( CoordType & v ) { + std::ldiv_t ratio = std::ldiv( v, params.coarsening_step ); + if( ratio.rem != 0 ) { + throw std::invalid_argument( std::string( "system size " ) + std::to_string( v ) + + std::string( " is not divisible by " ) + std::to_string( params.coarsening_step ) ); + } + v = ratio.quot; + } ); } } @@ -138,23 +135,20 @@ namespace grb { * This function takes care of the parallelism by employing random-access iterators and by * \b parallelizing the generation across multiple processes in case of distributed execution. */ - template < + template< size_t DIMS, typename CoordType, typename NonzeroType, typename Logger > grb::RC hpcg_populate_system_matrix( - const grb::algorithms::HPCGSystemBuilder< DIMS, CoordType, NonzeroType > &system_generator, - grb::Matrix< NonzeroType > &M, - Logger & logger + const grb::algorithms::HPCGSystemBuilder< DIMS, CoordType, NonzeroType > & system_generator, + grb::Matrix< NonzeroType > & M, Logger & logger ) { - logger << "- generating system matrix..."; - typename grb::algorithms::HPCGSystemBuilder< DIMS, CoordType, NonzeroType >::Iterator begin( - system_generator.make_begin_iterator() ); - typename grb::algorithms::HPCGSystemBuilder< DIMS, CoordType, NonzeroType >::Iterator end( - system_generator.make_end_iterator() - ); + typename grb::algorithms::HPCGSystemBuilder< DIMS, CoordType, NonzeroType >::Iterator + begin( system_generator.make_begin_iterator() ); + typename grb::algorithms::HPCGSystemBuilder< DIMS, CoordType, NonzeroType >::Iterator + end( system_generator.make_end_iterator() ); grb::utils::partition_iteration_range_on_procs( spmd<>::nprocs(), spmd<>::pid(), system_generator.num_neighbors(), begin, end ); return buildMatrixUnique( M, begin, end, grb::IOMode::PARALLEL ); @@ -184,19 +178,19 @@ namespace grb { typename IOType, typename NonzeroType > grb::RC hpcg_populate_coarsener_any_builder( - const grb::algorithms::HPCGSystemBuilder< DIMS, CoordType, NonzeroType > &finer_system_generator, - const grb::algorithms::HPCGSystemBuilder< DIMS, CoordType, NonzeroType > &coarser_system_generator, - CoarseningData< IOType, NonzeroType > &coarsener + const grb::algorithms::HPCGSystemBuilder< DIMS, CoordType, NonzeroType > & finer_system_generator, + const grb::algorithms::HPCGSystemBuilder< DIMS, CoordType, NonzeroType > & coarser_system_generator, + CoarseningData< IOType, NonzeroType > & coarsener ) { static_assert( DIMS > 0, "DIMS must be > 0" ); - const std::array< CoordType, DIMS > &finer_sizes = finer_system_generator.get_generator().get_sizes(); - const std::array< CoordType, DIMS > &coarser_sizes = coarser_system_generator.get_generator().get_sizes(); + const std::array< CoordType, DIMS > & finer_sizes = finer_system_generator.get_generator().get_sizes(); + const std::array< CoordType, DIMS > & coarser_sizes = coarser_system_generator.get_generator().get_sizes(); const size_t finer_size = finer_system_generator.system_size(); const size_t coarser_size = coarser_system_generator.system_size(); if( coarser_size >= finer_size ) { - throw std::invalid_argument( "wrong sizes"); + throw std::invalid_argument( "wrong sizes" ); } size_t const rows = coarser_size; @@ -204,17 +198,15 @@ namespace grb { assert( finer_sizes.size() == coarser_sizes.size() ); - grb::Matrix< NonzeroType > &M = coarsener.coarsening_matrix; + grb::Matrix< NonzeroType > & M = coarsener.coarsening_matrix; if( grb::nrows( M ) != rows || grb::ncols( M ) != cols ) { throw std::invalid_argument( "wrong matrix dimensions: matrix should be rectangular" - " with rows == and cols == " ); + " with rows == and cols == " ); } IterBuilderType coarsener_builder( finer_sizes, coarser_sizes ); - typename IterBuilderType::Iterator begin( coarsener_builder.make_begin_iterator() ), - end( coarsener_builder.make_end_iterator() ); - grb::utils::partition_iteration_range_on_procs( spmd<>::nprocs(), spmd<>::pid(), - coarsener_builder.system_size(), begin, end ); + typename IterBuilderType::Iterator begin( coarsener_builder.make_begin_iterator() ), end( coarsener_builder.make_end_iterator() ); + grb::utils::partition_iteration_range_on_procs( spmd<>::nprocs(), spmd<>::pid(), coarsener_builder.system_size(), begin, end ); return buildMatrixUnique( M, begin, end, grb::IOMode::PARALLEL ); } @@ -227,13 +219,13 @@ namespace grb { typename IOType, typename NonzeroType > grb::RC hpcg_populate_coarsener( - const grb::algorithms::HPCGSystemBuilder< DIMS, CoordType, NonzeroType > &finer_system_generator, - const grb::algorithms::HPCGSystemBuilder< DIMS, CoordType, NonzeroType > &coarser_system_generator, - CoarseningData< IOType, NonzeroType > &coarsener + const grb::algorithms::HPCGSystemBuilder< DIMS, CoordType, NonzeroType > & finer_system_generator, + const grb::algorithms::HPCGSystemBuilder< DIMS, CoordType, NonzeroType > & coarser_system_generator, + CoarseningData< IOType, NonzeroType > & coarsener ) { return hpcg_populate_coarsener_any_builder< - grb::algorithms::SinglePointCoarsenerBuilder< DIMS, CoordType, NonzeroType > > - ( finer_system_generator, coarser_system_generator, coarsener ); + grb::algorithms::SinglePointCoarsenerBuilder< DIMS, CoordType, NonzeroType > >( + finer_system_generator, coarser_system_generator, coarsener ); } /** @@ -245,13 +237,13 @@ namespace grb { typename IOType, typename NonzeroType > grb::RC hpcg_populate_coarsener_avg( - const grb::algorithms::HPCGSystemBuilder< DIMS, CoordType, NonzeroType > &finer_system_generator, - const grb::algorithms::HPCGSystemBuilder< DIMS, CoordType, NonzeroType > &coarser_system_generator, - CoarseningData< IOType, NonzeroType > &coarsener + const grb::algorithms::HPCGSystemBuilder< DIMS, CoordType, NonzeroType > & finer_system_generator, + const grb::algorithms::HPCGSystemBuilder< DIMS, CoordType, NonzeroType > & coarser_system_generator, + CoarseningData< IOType, NonzeroType > & coarsener ) { return hpcg_populate_coarsener_any_builder< - grb::algorithms::AverageCoarsenerBuilder< DIMS, CoordType, NonzeroType > > - ( finer_system_generator, coarser_system_generator, coarsener ); + grb::algorithms::AverageCoarsenerBuilder< DIMS, CoordType, NonzeroType > >( + finer_system_generator, coarser_system_generator, coarsener ); } namespace internal { @@ -264,10 +256,10 @@ namespace grb { * @param[out] per_color_rows for each position \a i it stores an std::vector with all rows * of color \a i inside \p row_colors */ - template< typename CoordType > void hpcg_split_rows_by_color( + template< typename CoordType > + void hpcg_split_rows_by_color( const std::vector< CoordType > & row_colors, - size_t num_colors, - std::vector< std::vector< CoordType > > & per_color_rows + size_t num_colors, std::vector< std::vector< CoordType > > & per_color_rows ) { per_color_rows.resize( num_colors ); for( CoordType i = 0; i < row_colors.size(); i++ ) { @@ -284,7 +276,8 @@ namespace grb { * * @tparam CoordType type of the internal coordinate */ - template< typename CoordType > struct true_iter { + template< typename CoordType > + struct true_iter { // static const bool __TRUE; @@ -292,12 +285,12 @@ namespace grb { using iterator_category = std::random_access_iterator_tag; using value_type = bool; using pointer = const bool *; - using reference = const bool&; + using reference = const bool &; using difference_type = long; true_iter() = delete; - true_iter( CoordType first ): index( first ) {} + true_iter( CoordType first ) : index( first ) {} true_iter( const self_t & ) = default; @@ -308,7 +301,7 @@ namespace grb { } self_t & operator++() noexcept { - (void) index++; + (void)index++; return *this; } @@ -326,7 +319,7 @@ namespace grb { } reference operator*() const { - return *(this->operator->()); + return *( this->operator->() ); } private: @@ -353,8 +346,8 @@ namespace grb { */ grb::RC hpcg_build_static_color_masks( size_t matrix_size, - const std::vector< std::vector< size_t > > &per_color_rows, - std::vector< grb::Vector< bool> > &masks + const std::vector< std::vector< size_t > > & per_color_rows, + std::vector< grb::Vector< bool > > & masks ) { if( ! masks.empty() ) { throw std::invalid_argument( "vector of masks is expected to be empty" ); @@ -375,11 +368,9 @@ namespace grb { std::vector< size_t >::const_iterator begin = rows.cbegin(); std::vector< size_t >::const_iterator end = rows.cend(); // partition_iteration_range( rows.size(), begin, end ); - grb::RC rc = grb::buildVectorUnique( output_mask, begin , end, true_iter< size_t >( 0 ), - true_iter< size_t >( rows.size() ), IOMode::SEQUENTIAL ); + grb::RC rc = grb::buildVectorUnique( output_mask, begin, end, true_iter< size_t >( 0 ), true_iter< size_t >( rows.size() ), IOMode::SEQUENTIAL ); if( rc != SUCCESS ) { - std::cerr << "error while creating output mask for color " << i << ": " - << toString( rc ) << std::endl; + std::cerr << "error while creating output mask for color " << i << ": " << toString( rc ) << std::endl; return rc; } #ifdef _DEBUG @@ -389,7 +380,8 @@ namespace grb { for( const auto & v : output_mask ) { std::cout << v.first << " "; count++; - if( count > 20 ) break; + if( count > 20 ) + break; } std::cout << std::endl; } @@ -423,9 +415,8 @@ namespace grb { typename NonzeroType, typename Logger > grb::RC hpcg_populate_smoothing_data( - const grb::algorithms::HPCGSystemBuilder< DIMS, CoordType, NonzeroType > &system_generator, - SmootherData< NonzeroType > &smoothing_info, - Logger & logger + const grb::algorithms::HPCGSystemBuilder< DIMS, CoordType, NonzeroType > & system_generator, + SmootherData< NonzeroType > & smoothing_info, Logger & logger ) { grb::RC rc = set( smoothing_info.A_diagonal, system_generator.get_diag_value() ); if( rc != grb::SUCCESS ) { @@ -444,10 +435,9 @@ namespace grb { logger << "error: " << __LINE__ << std::endl; return rc; } - logger <<"- found " << color_counters.size() << " colors," - << " generating color masks..."; - return internal::hpcg_build_static_color_masks( system_generator.system_size(), - per_color_rows, smoothing_info.color_masks ); + logger << "- found " << color_counters.size() << " colors," + << " generating color masks..."; + return internal::hpcg_build_static_color_masks( system_generator.system_size(), per_color_rows, smoothing_info.color_masks ); } } // namespace algorithms diff --git a/include/graphblas/algorithms/multigrid/multigrid_building_utils.hpp b/include/graphblas/algorithms/multigrid/multigrid_building_utils.hpp index f46b8e558..9c95b50cc 100644 --- a/include/graphblas/algorithms/multigrid/multigrid_building_utils.hpp +++ b/include/graphblas/algorithms/multigrid/multigrid_building_utils.hpp @@ -24,9 +24,9 @@ #ifndef _H_GRB_ALGORITHMS_MULTIGRID_BUILDING_UTILS #define _H_GRB_ALGORITHMS_MULTIGRID_BUILDING_UTILS -#include -#include #include +#include +#include namespace grb { namespace algorithms { @@ -81,7 +81,7 @@ namespace grb { throw std::invalid_argument( "at least one size should be available" ); } size_t finer_size = mg_sizes[ 0 ]; - system_levels.emplace_back( new MGInfoType( tt, 0, finer_size ) ); // create main system + system_levels.emplace_back( new MGInfoType( tt, 0, finer_size ) ); // create main system smoother_levels.emplace_back( new SmootherInfoType( finer_size ) ); // create smoother for main for( size_t i = 1; i < mg_sizes.size(); i++ ) { size_t coarser_size = mg_sizes[ i ]; diff --git a/include/graphblas/algorithms/multigrid/multigrid_cg.hpp b/include/graphblas/algorithms/multigrid/multigrid_cg.hpp index 3099e7d4e..f465ba8da 100644 --- a/include/graphblas/algorithms/multigrid/multigrid_cg.hpp +++ b/include/graphblas/algorithms/multigrid/multigrid_cg.hpp @@ -36,12 +36,11 @@ #include #include -#include #include +#include #include "multigrid_data.hpp" - namespace grb { namespace algorithms { @@ -87,8 +86,8 @@ namespace grb { /** * Structure for the output information of a CG run. */ - template < typename ResidualType > struct CGOutInfo { - size_t iterations; ///< number of iterations performed + template< typename ResidualType > struct CGOutInfo { + size_t iterations; ///< number of iterations performed ResidualType norm_residual; ///< norm of the final residual }; @@ -134,15 +133,15 @@ namespace grb { static_assert( std::is_move_constructible< MultiGridRunnerType >::value, "cannot construct the Multi-Grid runner by move" ); - Ring ring; ///< algebraic ring to be used - Minus minus; ///< minus operator to be used - bool with_preconditioning = true; ///< whether preconditioning is enabled - size_t max_iterations = 10; ///< max number of allowed iterations for CG: after that, the solver is halted - ///< and the result achieved so far returned - ResidualType tolerance = ring. template getZero< ResidualType >(); ///< ratio between initial residual and current residual that halts the solver - ///< if reached, for the solution is to be considered "good enough" + Ring ring; ///< algebraic ring to be used + Minus minus; ///< minus operator to be used + bool with_preconditioning = true; ///< whether preconditioning is enabled + size_t max_iterations = 10; ///< max number of allowed iterations for CG: after that, the solver is halted + ///< and the result achieved so far returned + ResidualType tolerance = ring.template getZero< ResidualType >(); ///< ratio between initial residual and current residual that halts the solver + ///< if reached, for the solution is to be considered "good enough" - MultiGridRunnerType &mg_runner; + MultiGridRunnerType & mg_runner; DbgOutputStreamType dbg_logger; /** @@ -169,8 +168,7 @@ namespace grb { ) : grb::utils::telemetry::Timeable< TelControllerType >( tt ), mg_runner( _mg_runner ), - dbg_logger( _dbg_logger ) - {} + dbg_logger( _dbg_logger ) {} /** * Functional operator to invoke a full CG-MG computation. @@ -181,9 +179,9 @@ namespace grb { * @return grb::RC indicating the success or the error occurred */ inline grb::RC operator()( - typename MultiGridRunnerType::MultiGridInputType &grid_base, - MultiGridCGData< IOType, NonzeroType, InputType > &cg_data, - CGOutInfo< ResidualType > &out_info + typename MultiGridRunnerType::MultiGridInputType & grid_base, + MultiGridCGData< IOType, NonzeroType, InputType > & cg_data, + CGOutInfo< ResidualType > & out_info ) { this->start(); grb::RC ret = multigrid_conjugate_gradient( cg_data, grid_base, out_info ); @@ -209,17 +207,17 @@ namespace grb { * @return grb::RC SUCCESS in case of succesful run */ grb::RC multigrid_conjugate_gradient( - HPCGInputType &cg_data, - typename MultiGridRunnerType::MultiGridInputType &grid_base, - CGOutInfo< ResidualType > &out_info + HPCGInputType & cg_data, + typename MultiGridRunnerType::MultiGridInputType & grid_base, + CGOutInfo< ResidualType > & out_info ) { - const grb::Matrix< NonzeroType > &A = grid_base.A; // system matrix - grb::Vector< IOType > &r = grid_base.r; // residual vector - grb::Vector< IOType > &z = grid_base.z; // pre-conditioned residual vector - grb::Vector< IOType > &x = cg_data.x; // initial (and final) solution - const grb::Vector< InputType > &b = cg_data.b; // right-side value - grb::Vector< IOType > &p = cg_data.p; // direction vector - grb::Vector< IOType > &Ap = cg_data.u; // temp vector + const grb::Matrix< NonzeroType > & A = grid_base.A; // system matrix + grb::Vector< IOType > & r = grid_base.r; // residual vector + grb::Vector< IOType > & z = grid_base.z; // pre-conditioned residual vector + grb::Vector< IOType > & x = cg_data.x; // initial (and final) solution + const grb::Vector< InputType > & b = cg_data.b; // right-side value + grb::Vector< IOType > & p = cg_data.p; // direction vector + grb::Vector< IOType > & Ap = cg_data.u; // temp vector grb::RC ret = SUCCESS; const IOType io_zero = ring.template getZero< IOType >(); @@ -329,12 +327,10 @@ namespace grb { ++iter; out_info.iterations = iter; out_info.norm_residual = norm_residual; - } while( iter < max_iterations && - norm_residual / norm_residual_initial > tolerance && ret == SUCCESS ); + } while( iter < max_iterations && norm_residual / norm_residual_initial > tolerance && ret == SUCCESS ); return ret; } - }; } // namespace algorithms diff --git a/include/graphblas/algorithms/multigrid/multigrid_data.hpp b/include/graphblas/algorithms/multigrid/multigrid_data.hpp index 67fe7bb8f..4f0d0eed4 100644 --- a/include/graphblas/algorithms/multigrid/multigrid_data.hpp +++ b/include/graphblas/algorithms/multigrid/multigrid_data.hpp @@ -24,13 +24,12 @@ #ifndef _H_GRB_ALGORITHMS_HPCG_DATA #define _H_GRB_ALGORITHMS_HPCG_DATA -#include #include +#include #include #include - namespace grb { namespace algorithms { @@ -61,8 +60,8 @@ namespace grb { const size_t level; ///< level of the grid (0 for the finest physical system) const size_t system_size; ///< size of the system, i.e. side of the #A system matrix grb::Matrix< NonzeroType > A; ///< system matrix - grb::Vector< IOType > z; ///< multi-grid solution - grb::Vector< IOType > r; ///< residual + grb::Vector< IOType > z; ///< multi-grid solution + grb::Vector< IOType > r; ///< residual /** * Construct a new multigrid data object from level information and system size. @@ -98,4 +97,3 @@ namespace grb { } // namespace grb #endif // _H_GRB_ALGORITHMS_HPCG_DATA - diff --git a/include/graphblas/algorithms/multigrid/multigrid_v_cycle.hpp b/include/graphblas/algorithms/multigrid/multigrid_v_cycle.hpp index dbe15d2b8..31b623024 100644 --- a/include/graphblas/algorithms/multigrid/multigrid_v_cycle.hpp +++ b/include/graphblas/algorithms/multigrid/multigrid_v_cycle.hpp @@ -26,10 +26,10 @@ #define _H_GRB_ALGORITHMS_MULTIGRID_V_CYCLE #include -#include -#include #include +#include #include +#include #include #include @@ -37,6 +37,7 @@ #include "multigrid_data.hpp" + namespace grb { namespace algorithms { @@ -57,8 +58,7 @@ namespace grb { * @tparam Minus the minus operator for subtractions * @tparam descr descriptors with statically-known data for computation and containers */ - template< - typename MGTypes, + template< typename MGTypes, typename MGSmootherType, typename CoarsenerType, typename TelControllerType, @@ -83,63 +83,60 @@ namespace grb { "cannot construct the Minus operator with default values" ); // check the interface between HPCG and MG match - static_assert( std::is_base_of< typename MGSmootherType::SmootherInputType, - MultiGridInputType >::value, "input type of the Smoother kernel must match the input from Multi-Grid" ); + static_assert( std::is_base_of< typename MGSmootherType::SmootherInputType, MultiGridInputType >::value, + "input type of the Smoother kernel must match the input from Multi-Grid" ); MGSmootherType & smoother_runner; ///< object to run the smoother CoarsenerType & coarsener_runner; ///< object to run the coarsener DbgOutputStreamType dbg_logger; std::vector< std::unique_ptr< MultiGridInputType > > system_levels; ///< levels of the grid (finest first) - Ring ring; ///< algebraic ring - Minus minus; ///< minus operator + Ring ring; ///< algebraic ring + Minus minus; ///< minus operator // operator to extract the reference out of an std::unique_ptr object struct __extractor { - MultiGridInputType* operator()( - typename std::vector< std::unique_ptr< MultiGridInputType > >::reference &ref - ) { + MultiGridInputType * operator()( + typename std::vector< std::unique_ptr< MultiGridInputType > >::reference & ref ) { return ref.get(); } - const MultiGridInputType* operator()( - typename std::vector< std::unique_ptr< MultiGridInputType > >::const_reference &ref - ) const { + const MultiGridInputType * operator()( + typename std::vector< std::unique_ptr< MultiGridInputType > >::const_reference & ref ) const { return ref.get(); } }; using __unique_ptr_extractor = grb::utils::IteratorValueAdaptor< - typename std::vector< std::unique_ptr< MultiGridInputType > >::iterator, - __extractor - >; + typename std::vector< std::unique_ptr< MultiGridInputType > >::iterator, __extractor >; /** * Construct a new MultiGridRunner object by moving in the state of the pre-built * smoother and coarsener. */ MultiGridRunner( - MGSmootherType &_smoother_runner, - CoarsenerType &_coarsener_runner - ) : smoother_runner( _smoother_runner ), - coarsener_runner( _coarsener_runner ) + MGSmootherType & _smoother_runner, + CoarsenerType & _coarsener_runner + ) : + smoother_runner( _smoother_runner ), + coarsener_runner( _coarsener_runner ) { static_assert( std::is_default_constructible< DbgOutputStreamType >::value ); } MultiGridRunner( - MGSmootherType &_smoother_runner, - CoarsenerType &_coarsener_runner, + MGSmootherType & _smoother_runner, + CoarsenerType & _coarsener_runner, DbgOutputStreamType & _dbg_logger - ) : smoother_runner( _smoother_runner ), - coarsener_runner( _coarsener_runner ), - dbg_logger( _dbg_logger ) - {} + ) : + smoother_runner( _smoother_runner ), + coarsener_runner( _coarsener_runner ), + dbg_logger( _dbg_logger ) {} /** * Operator to invoke a full multi-grid run starting from the given level. */ - inline grb::RC operator()( MultiGridInputType &system ) { + inline grb::RC operator()( MultiGridInputType & system ) { return this->operator()( __unique_ptr_extractor( system_levels.begin() += system.level ), __unique_ptr_extractor( system_levels.end() ) ); } @@ -172,17 +169,6 @@ namespace grb { * Failuers of GraphBLAS operations are handled by immediately stopping the execution * and returning the failure code. * - * @tparam descr descriptor for static information - * @tparam IOType type of result and intermediate vectors used during computation - * @tparam NonzeroType type of matrix values - * @tparam MGSysIterType type of the iterator across grid levels - * @tparam MGSmootherType type of the smoother runner, with prescribed methods for the various - * smoothing steps - * @tparam CoarsenerType type of the coarsener runner, with prescribed methods for coarsening - * and prolongation - * @tparam Ring the ring of algebraic operators zero-values - * @tparam Minus the minus operator for subtractions - * * @param mgiter_begin iterator pointing to the current level of the multi-grid * @param mgiter_end end iterator, indicating the end of the recursion * @param smoother callable object to invoke the smoothing steps @@ -198,16 +184,16 @@ namespace grb { ) { RC ret = SUCCESS; assert( mgiter_begin != mgiter_end ); - MultiGridInputType &finer_system = *mgiter_begin; + MultiGridInputType & finer_system = *mgiter_begin; ++mgiter_begin; dbg_logger << "mg BEGINNING {" << std::endl; // clean destination vector - ret = ret ? ret : grb::set< descr >( finer_system.z, ring. template getZero< IOType >() ); + ret = ret ? ret : grb::set< descr >( finer_system.z, ring.template getZero< IOType >() ); dbg_logger << ">>> initial r: " << finer_system.r << std::endl; - if( !( mgiter_begin != mgiter_end ) ) { + if( ! ( mgiter_begin != mgiter_end ) ) { // compute one round of Gauss Seidel and return ret = ret ? ret : smoother_runner.nonrecursive_smooth( finer_system ); assert( ret == SUCCESS ); @@ -215,7 +201,7 @@ namespace grb { dbg_logger << "} mg END" << std::endl; return ret; } - MultiGridInputType &coarser_system = *mgiter_begin; + MultiGridInputType & coarser_system = *mgiter_begin; // pre-smoother ret = ret ? ret : smoother_runner.pre_smooth( finer_system ); @@ -241,7 +227,6 @@ namespace grb { return ret; } - }; } // namespace algorithms diff --git a/include/graphblas/algorithms/multigrid/red_black_gauss_seidel.hpp b/include/graphblas/algorithms/multigrid/red_black_gauss_seidel.hpp index 305fa30d7..aa7157de7 100644 --- a/include/graphblas/algorithms/multigrid/red_black_gauss_seidel.hpp +++ b/include/graphblas/algorithms/multigrid/red_black_gauss_seidel.hpp @@ -38,8 +38,8 @@ namespace grb { */ template< typename IOType > struct SmootherData { - grb::Vector< IOType > A_diagonal; ///< vector with the diagonal of #A - grb::Vector< IOType > smoother_temp; ///< for smoother's intermediate results + grb::Vector< IOType > A_diagonal; ///< vector with the diagonal of #A + grb::Vector< IOType > smoother_temp; ///< for smoother's intermediate results std::vector< grb::Vector< bool > > color_masks; ///< for color masks /** @@ -59,7 +59,6 @@ namespace grb { } }; - /** * Runner object for the RBGS smoother, with multiple methods for each type of smoothing step: * pre-, post- and non-recursive, as invoked during a full run of a multi-grid V-cycle. @@ -70,7 +69,7 @@ namespace grb { * @tparam TelControllerType telemetry controller to (de)activate time tracing within passed MultiGridData objects * @tparam descr descriptors with statically-known data for computation and containers */ - template < + template< class SmootherTypes, typename TelControllerType, Descriptor descr = descriptors::no_operation @@ -83,27 +82,25 @@ namespace grb { using SmootherInputType = MultiGridData< IOType, NonzeroType, TelControllerType >; ///< external input structure using SmootherDataType = SmootherData< IOType >; ///< smoothing information and temporary variables (per MG level) - size_t presmoother_steps = 1UL; ///< number of pre-smoother steps - size_t postsmoother_steps = 1UL; ///< number of post-smoother steps - size_t non_recursive_smooth_steps = 1UL; ///< number of smoother steps for the last grid level - std::vector< std::unique_ptr< SmootherDataType > > levels; ///< for each grid level, - ///< the smoothing data (finest first) - Ring ring; ///< the algebraic ring + size_t presmoother_steps = 1UL; ///< number of pre-smoother steps + size_t postsmoother_steps = 1UL; ///< number of post-smoother steps + size_t non_recursive_smooth_steps = 1UL; ///< number of smoother steps for the last grid level + std::vector< std::unique_ptr< SmootherDataType > > levels; ///< for each grid level, + ///< the smoothing data (finest first) + Ring ring; ///< the algebraic ring static_assert( std::is_default_constructible< Ring >::value, "cannot construct the Ring operator with default values" ); - - - inline grb::RC pre_smooth( SmootherInputType& data ) { + inline grb::RC pre_smooth( SmootherInputType & data ) { return run_smoother( data, presmoother_steps ); } - inline grb::RC post_smooth( SmootherInputType& data ) { + inline grb::RC post_smooth( SmootherInputType & data ) { return run_smoother( data, postsmoother_steps ); } - inline grb::RC nonrecursive_smooth( SmootherInputType& data ) { + inline grb::RC nonrecursive_smooth( SmootherInputType & data ) { return run_smoother( data, non_recursive_smooth_steps ); } @@ -116,12 +113,12 @@ namespace grb { * smoother performs all smoothing steps the same way. */ grb::RC run_smoother( - SmootherInputType &data, + SmootherInputType & data, const size_t smoother_steps ) { RC ret = SUCCESS; - SmootherDataType &smoothing_info = *( levels.at( data.level ).get() ); + SmootherDataType & smoothing_info = *( levels.at( data.level ).get() ); data.sm_stopwatch.start(); for( size_t i = 0; i < smoother_steps && ret == SUCCESS; i++ ) { @@ -145,8 +142,8 @@ namespace grb { * unsuccessful operation otherwise */ grb::RC red_black_gauss_seidel_single_step( - SmootherInputType &data, - SmootherDataType &smoothing_info, + SmootherInputType & data, + SmootherDataType & smoothing_info, size_t color ) { const grb::Matrix< NonzeroType > & A = data.A; @@ -168,16 +165,16 @@ namespace grb { // z[mask] = r[mask] - smoother_temp[mask] + z[mask] .* diagonal[mask] // z[mask] = z[maks] ./ diagonal[mask] ret = ret ? ret : - grb::eWiseLambda( - [ &z, &r, &smoother_temp, &color_mask, &A_diagonal ]( const size_t i ) { - // if the mask was properly initialized, the check on the mask value is unnecessary; - // if( color_mask[ i ] ) { - IOType d = A_diagonal[ i ]; - IOType v = r[ i ] - smoother_temp[ i ] + z[ i ] * d; - z[ i ] = v / d; - // } - }, - color_mask, z, r, smoother_temp, A_diagonal ); + grb::eWiseLambda( + [ &z, &r, &smoother_temp, &color_mask, &A_diagonal ]( const size_t i ) { + // if the mask was properly initialized, the check on the mask value is unnecessary; + // if( color_mask[ i ] ) { + IOType d = A_diagonal[ i ]; + IOType v = r[ i ] - smoother_temp[ i ] + z[ i ] * d; + z[ i ] = v / d; + // } + }, + color_mask, z, r, smoother_temp, A_diagonal ); assert( ret == SUCCESS ); return ret; } @@ -197,32 +194,31 @@ namespace grb { * unsuccessful operation otherwise */ grb::RC red_black_gauss_seidel( - SmootherInputType &data, - SmootherDataType &smoothing_info + SmootherInputType & data, + SmootherDataType & smoothing_info ) { RC ret = SUCCESS; // zero the temp output just once, assuming proper masking avoids // interference among different colors ret = ret ? ret : grb::set< descr >( smoothing_info.smoother_temp, - ring. template getZero< IOType >() ); + ring.template getZero< IOType >() ); // forward step for( size_t color = 0; color < smoothing_info.color_masks.size(); ++color ) { ret = red_black_gauss_seidel_single_step( data, smoothing_info, color ); } ret = ret ? ret : grb::set< descr >( smoothing_info.smoother_temp, - ring. template getZero< IOType >() ); + ring.template getZero< IOType >() ); // backward step for( size_t color = smoothing_info.color_masks.size(); color > 0; --color ) { ret = red_black_gauss_seidel_single_step( data, smoothing_info, color - 1 ); - } return ret; } }; - } // namespace algorithms + } // namespace algorithms } // namespace grb #endif // H_GRB_ALGORITHMS_RED_BLACK_GAUSS_SEIDEL diff --git a/include/graphblas/algorithms/multigrid/single_matrix_coarsener.hpp b/include/graphblas/algorithms/multigrid/single_matrix_coarsener.hpp index 0e2ee58af..4a19f9deb 100644 --- a/include/graphblas/algorithms/multigrid/single_matrix_coarsener.hpp +++ b/include/graphblas/algorithms/multigrid/single_matrix_coarsener.hpp @@ -24,8 +24,8 @@ #ifndef _H_GRB_ALGORITHMS_HPCG_SINGLE_MATRIX_COARSENER #define _H_GRB_ALGORITHMS_HPCG_SINGLE_MATRIX_COARSENER -#include #include +#include #include @@ -52,7 +52,10 @@ namespace grb { * @param[in] _finer_size size of the finer system, i.e. size of external objects \b before coarsening * @param[in] coarser_size size of the current system, i.e. size \b after coarsening */ - CoarseningData( size_t _finer_size, size_t coarser_size ) : + CoarseningData( + size_t _finer_size, + size_t coarser_size + ) : coarsening_matrix( coarser_size, _finer_size ), Ax_finer( _finer_size ) {} @@ -89,8 +92,7 @@ namespace grb { /** * Data to coarsen each level, from finer to coarser. */ - std::vector< std::unique_ptr< grb::algorithms::CoarseningData< IOType, - NonzeroType > > > coarsener_levels; + std::vector< std::unique_ptr< grb::algorithms::CoarseningData< IOType, NonzeroType > > > coarsener_levels; Ring ring; Minus minus; @@ -100,12 +102,12 @@ namespace grb { * \p coarser (the coarser system). */ inline grb::RC coarsen_residual( - const MultiGridInputType &finer, - MultiGridInputType &coarser + const MultiGridInputType & finer, + MultiGridInputType & coarser ) { // first compute the residual - CoarseningData< IOType, NonzeroType > &coarsener = *coarsener_levels[ finer.level ]; - grb::RC ret = grb::set< descr >( coarsener.Ax_finer, ring. template getZero< IOType >() ); + CoarseningData< IOType, NonzeroType > & coarsener = *coarsener_levels[ finer.level ]; + grb::RC ret = grb::set< descr >( coarsener.Ax_finer, ring.template getZero< IOType >() ); ret = ret ? ret : grb::mxv< descr >( coarsener.Ax_finer, finer.A, finer.z, ring ); return ret ? ret : compute_coarsening( finer.r, coarser.r, coarsener ); @@ -116,8 +118,8 @@ namespace grb { * into the finer solution. */ inline grb::RC prolong_solution( - const MultiGridInputType &coarser, - MultiGridInputType &finer + const MultiGridInputType & coarser, + MultiGridInputType & finer ) { return compute_prolongation( coarser.z, finer.z, *coarsener_levels[ finer.level ] ); } @@ -136,7 +138,7 @@ namespace grb { */ grb::RC compute_coarsening( const grb::Vector< IOType > & r_fine, // fine residual - grb::Vector< IOType > & r_coarse, // fine residual + grb::Vector< IOType > & r_coarse, // coarse residual CoarseningData< IOType, NonzeroType > & coarsening_data ) { RC ret = SUCCESS; @@ -171,13 +173,15 @@ namespace grb { RC ret = SUCCESS; // actual refining, from *coarsening_data->syztem_size == nrows(*coarsening_data->A) / 8 // to nrows(z_fine) - ret = ret ? ret : grb::set< descr >( coarsening_data.Ax_finer, ring.template getZero< IOType >() ); + ret = ret ? ret : grb::set< descr >( coarsening_data.Ax_finer, + ring.template getZero< IOType >() ); ret = ret ? ret : grb::mxv< descr | grb::descriptors::transpose_matrix >( coarsening_data.Ax_finer, coarsening_data.coarsening_matrix, z_coarse, ring ); assert( ret == SUCCESS ); - ret = ret ? ret : grb::foldl< descr >( z_fine, coarsening_data.Ax_finer, ring.getAdditiveMonoid() ); // z_fine += Ax_finer; + ret = ret ? ret : grb::foldl< descr >( z_fine, coarsening_data.Ax_finer, + ring.getAdditiveMonoid() ); // z_fine += Ax_finer; assert( ret == SUCCESS ); return ret; } diff --git a/include/graphblas/utils/iterators/IteratorValueAdaptor.hpp b/include/graphblas/utils/iterators/IteratorValueAdaptor.hpp index 2c0383325..ebac6ca02 100644 --- a/include/graphblas/utils/iterators/IteratorValueAdaptor.hpp +++ b/include/graphblas/utils/iterators/IteratorValueAdaptor.hpp @@ -56,7 +56,8 @@ namespace grb { static_assert( std::is_copy_assignable< AdaptorType >::value, "AdaptorType must be copy-assignable" ); - typedef typename std::decay< decltype( *std::declval< AdaptorType >()( *std::declval< InnerIterType >() ) ) >::type value_type; + typedef typename std::decay< + decltype( *std::declval< AdaptorType >()( *std::declval< InnerIterType >() ) )>::type value_type; typedef value_type & reference; typedef value_type * pointer; typedef const value_type * const_pointer; @@ -96,8 +97,7 @@ namespace grb { * * @param _iter the underlying iterator, to be moved */ - IteratorValueAdaptor( - typename std::enable_if< std::is_default_constructible< AdaptorType >::value, + IteratorValueAdaptor( typename std::enable_if< std::is_default_constructible< AdaptorType >::value, InnerIterType && >::type _iter ) : iter( std::move( _iter ) ), diff --git a/include/graphblas/utils/multigrid/array_vector_storage.hpp b/include/graphblas/utils/multigrid/array_vector_storage.hpp index a40850f77..cfca1dda2 100644 --- a/include/graphblas/utils/multigrid/array_vector_storage.hpp +++ b/include/graphblas/utils/multigrid/array_vector_storage.hpp @@ -25,10 +25,10 @@ #ifndef _H_GRB_ALGORITHMS_MULTIGRID_ARRAY_VECTOR_STORAGE #define _H_GRB_ALGORITHMS_MULTIGRID_ARRAY_VECTOR_STORAGE -#include -#include #include +#include #include +#include namespace grb { namespace utils { @@ -46,11 +46,10 @@ namespace grb { template< size_t DIMS, typename DataType - > class ArrayVectorStorage: public std::array< DataType, DIMS > { + > class ArrayVectorStorage : public std::array< DataType, DIMS > { public: - - using VectorStorageType = std::array< DataType, DIMS >&; - using ConstVectorStorageType = const std::array< DataType, DIMS >&; + using VectorStorageType = std::array< DataType, DIMS > &; + using ConstVectorStorageType = const std::array< DataType, DIMS > &; using SelfType = ArrayVectorStorage< DIMS, DataType >; /** @@ -62,25 +61,25 @@ namespace grb { ArrayVectorStorage( size_t _dimensions ) { static_assert( DIMS > 0, "cannot allocate 0-sized array" ); if( _dimensions != DIMS ) { - throw std::invalid_argument("given dimensions must match the type dimensions"); + throw std::invalid_argument( "given dimensions must match the type dimensions" ); } } ArrayVectorStorage() = delete; // only copy constructor/assignment, since there's no external storage - ArrayVectorStorage( const SelfType &o ) noexcept { + ArrayVectorStorage( const SelfType & o ) noexcept { std::copy_n( o.cbegin(), DIMS, this->begin() ); } - ArrayVectorStorage( SelfType &&o ) = delete; + ArrayVectorStorage( SelfType && o ) = delete; - SelfType& operator=( const SelfType &original ) noexcept { + SelfType & operator=( const SelfType & original ) noexcept { std::copy_n( original.begin(), DIMS, this->begin() ); return *this; } - SelfType & operator=( SelfType &&original ) = delete; + SelfType & operator=( SelfType && original ) = delete; /** * Returns the geometrical dimensions of this vector, i.e. of the @@ -106,7 +105,7 @@ namespace grb { }; } // namespace multigrid - } // namespace utils + } // namespace utils } // namespace grb #endif // _H_GRB_ALGORITHMS_MULTIGRID_ARRAY_VECTOR_STORAGE diff --git a/include/graphblas/utils/multigrid/dynamic_vector_storage.hpp b/include/graphblas/utils/multigrid/dynamic_vector_storage.hpp index 9e5b7f92e..fff89b6db 100644 --- a/include/graphblas/utils/multigrid/dynamic_vector_storage.hpp +++ b/include/graphblas/utils/multigrid/dynamic_vector_storage.hpp @@ -24,9 +24,8 @@ #ifndef _H_GRB_ALGORITHMS_MULTIGRID_DYNAMIC_VECTOR_STORAGE #define _H_GRB_ALGORITHMS_MULTIGRID_DYNAMIC_VECTOR_STORAGE -#include -#include #include +#include namespace grb { namespace utils { @@ -43,7 +42,7 @@ namespace grb { template< typename DataType > class DynamicVectorStorage { size_t _dimensions; - DataType* _storage; + DataType * _storage; void clean() { if( this->_storage != nullptr ) { @@ -53,47 +52,46 @@ namespace grb { public: // iterator fields - using reference = DataType&; - using const_reference = const DataType&; - using iterator = DataType*; - using const_iterator = const DataType*; - using pointer = DataType*; - using const_pointer = const DataType*; - - using VectorStorageType = DataType*; - using ConstVectorStorageType = DataType*; + using reference = DataType &; + using const_reference = const DataType &; + using iterator = DataType *; + using const_iterator = const DataType *; + using pointer = DataType *; + using const_pointer = const DataType *; + + using VectorStorageType = DataType *; + using ConstVectorStorageType = DataType *; using SelfType = DynamicVectorStorage< DataType >; - DynamicVectorStorage( size_t __dimensions ): - _dimensions( __dimensions ) { + DynamicVectorStorage( size_t __dimensions ) : _dimensions( __dimensions ) { if( __dimensions == 0 ) { - throw std::invalid_argument("dimensions cannot be 0"); + throw std::invalid_argument( "dimensions cannot be 0" ); } this->_storage = new DataType[ __dimensions ]; } DynamicVectorStorage() = delete; - DynamicVectorStorage( const SelfType &o ): + DynamicVectorStorage( const SelfType & o ) : _dimensions( o._dimensions ), _storage( new DataType[ o._dimensions ] ) { std::copy_n( o._storage, o._dimensions, this->_storage ); } - DynamicVectorStorage( SelfType &&o ) = delete; + DynamicVectorStorage( SelfType && o ) = delete; - SelfType& operator=( const SelfType &original ) { + SelfType & operator=( const SelfType & original ) { if( original._dimensions != this->_dimensions ) { this->clean(); - this->_storage = new DataType[ original._dimensions]; + this->_storage = new DataType[ original._dimensions ]; } this->_dimensions = original._dimensions; std::copy_n( original._storage, original._dimensions, this->_storage ); return *this; } - SelfType& operator=( SelfType &&original ) = delete; + SelfType & operator=( SelfType && original ) = delete; ~DynamicVectorStorage() { this->clean(); @@ -136,7 +134,7 @@ namespace grb { } inline reference operator[]( size_t pos ) { - return *( this->_storage + pos); + return *( this->_storage + pos ); } inline const_reference operator[]( size_t pos ) const { @@ -145,7 +143,7 @@ namespace grb { }; } // namespace multigrid - } // namespace utils + } // namespace utils } // namespace grb #endif // _H_GRB_ALGORITHMS_MULTIGRID_DYNAMIC_VECTOR_STORAGE diff --git a/include/graphblas/utils/multigrid/halo_matrix_generator_iterator.hpp b/include/graphblas/utils/multigrid/halo_matrix_generator_iterator.hpp index 2404cdf00..e51d7d6df 100644 --- a/include/graphblas/utils/multigrid/halo_matrix_generator_iterator.hpp +++ b/include/graphblas/utils/multigrid/halo_matrix_generator_iterator.hpp @@ -37,8 +37,8 @@ #include "array_vector_storage.hpp" #include "linearized_halo_ndim_system.hpp" -#include "linearized_ndim_system.hpp" #include "linearized_ndim_iterator.hpp" +#include "linearized_ndim_system.hpp" namespace grb { namespace utils { @@ -67,11 +67,9 @@ namespace grb { typename CoordType, typename ValueType, typename ValueCallable - > - struct HaloMatrixGeneratorIterator { + > struct HaloMatrixGeneratorIterator { - static_assert( std::is_copy_constructible< ValueCallable >::value, - "ValueCallable must be copy-constructible" ); + static_assert( std::is_copy_constructible< ValueCallable >::value, "ValueCallable must be copy-constructible" ); using RowIndexType = CoordType; ///< numeric type of rows using ColumnIndexType = CoordType; @@ -84,23 +82,26 @@ namespace grb { friend SelfType; HaloPoint( - const ValueCallable &value_producer, + const ValueCallable & value_producer, RowIndexType i, ColumnIndexType j ) noexcept : _value_producer( value_producer ), _i( i ), - _j( j ) - {} + _j( j ) {} HaloPoint( const HaloPoint & ) = default; HaloPoint & operator=( const HaloPoint & ) = default; - inline RowIndexType i() const { return _i; } - inline ColumnIndexType j() const { return _j; } + inline RowIndexType i() const { + return _i; + } + inline ColumnIndexType j() const { + return _j; + } inline ValueType v() const { - return _value_producer( _i, _j); + return _value_producer( _i, _j ); } private: @@ -126,8 +127,8 @@ namespace grb { * @param non_diag value to emit outside the diagonal */ HaloMatrixGeneratorIterator( - const LinearSystemType &system, - const ValueCallable &value_producer + const LinearSystemType & system, + const ValueCallable & value_producer ) noexcept : _val( value_producer, 0, 0 ), _lin_system( &system ), @@ -150,7 +151,7 @@ namespace grb { * @return HaloMatrixGeneratorIterator& \c this object, with the updated state */ SelfType & operator++() noexcept { - (void) ++_sys_iter; + (void)++_sys_iter; update_coords(); return *this; } @@ -161,7 +162,7 @@ namespace grb { return *this; } - difference_type operator-( const SelfType &other ) const { + difference_type operator-( const SelfType & other ) const { return this->_sys_iter - other._sys_iter; } @@ -172,7 +173,7 @@ namespace grb { * @return true of the row or the column is different between \p o and \c this * @return false if both row and column of \p o and \c this are equal */ - bool operator!=( const SelfType &o ) const { + bool operator!=( const SelfType & o ) const { return this->_sys_iter != o._sys_iter; } @@ -183,7 +184,7 @@ namespace grb { * @return true of the row or the column is different between \p o and \c this * @return false if both row and column of \p o and \c this are equal */ - bool operator==( const SelfType &o ) const { + bool operator==( const SelfType & o ) const { return ! operator!=( o ); } @@ -227,7 +228,7 @@ namespace grb { private: value_type _val; - const LinearSystemType *_lin_system; + const LinearSystemType * _lin_system; Iterator _sys_iter; void update_coords() { @@ -237,7 +238,7 @@ namespace grb { }; } // namespace multigrid - } // namespace utils + } // namespace utils } // namespace grb #endif // _H_GRB_ALGORITHMS_MULTIGRID_HALO_MATRIX_GENRATOR_ITERATOR diff --git a/include/graphblas/utils/multigrid/linearized_halo_ndim_iterator.hpp b/include/graphblas/utils/multigrid/linearized_halo_ndim_iterator.hpp index 62e4dcd4a..3a5047277 100644 --- a/include/graphblas/utils/multigrid/linearized_halo_ndim_iterator.hpp +++ b/include/graphblas/utils/multigrid/linearized_halo_ndim_iterator.hpp @@ -25,16 +25,15 @@ #define _H_GRB_ALGORITHMS_MULTIGRID_LINEARIZED_HALO_NDIM_ITERATOR #include -#include #include #include -#include +#include #include -#include "linearized_ndim_system.hpp" #include "array_vector_storage.hpp" #include "linearized_ndim_iterator.hpp" +#include "linearized_ndim_system.hpp" namespace grb { namespace utils { @@ -100,10 +99,8 @@ namespace grb { * @tparam SizeType type of coordinates and of sizes (must be large enough to describe the size * of the system along each direction) */ - template< - size_t DIMS, - typename SizeType - > class LinearizedHaloNDimIterator { + template< size_t DIMS, typename SizeType > + class LinearizedHaloNDimIterator { using SystemType = LinearizedHaloNDimSystem< DIMS, SizeType >; using VectorType = ArrayVectorStorage< DIMS, SizeType >; @@ -120,14 +117,13 @@ namespace grb { */ struct HaloNDimElement { private: - // for linearization - const SystemType* _system; + const SystemType * _system; // for iteration VectorIteratorType _element_iter; // coordinates iterator - VectorType _neighbor; //the current neighbor + VectorType _neighbor; // the current neighbor SizeType _position; public: @@ -135,11 +131,11 @@ namespace grb { HaloNDimElement() = delete; - HaloNDimElement( const HaloNDimElement& ) = default; + HaloNDimElement( const HaloNDimElement & ) = default; - HaloNDimElement( HaloNDimElement&& ) = delete; + HaloNDimElement( HaloNDimElement && ) = delete; - HaloNDimElement( const SystemType& system ) noexcept : + HaloNDimElement( const SystemType & system ) noexcept : _system( &system ), _element_iter( system ), _neighbor( DIMS ), @@ -148,7 +144,7 @@ namespace grb { std::fill_n( this->_neighbor.begin(), DIMS, 0 ); } - HaloNDimElement& operator=( const HaloNDimElement& ) = default; + HaloNDimElement & operator=( const HaloNDimElement & ) = default; /** * Get the element as vector coordinates. @@ -189,8 +185,8 @@ namespace grb { // interface for std::random_access_iterator using iterator_category = std::random_access_iterator_tag; using value_type = HaloNDimElement; - using pointer = const HaloNDimElement*; - using reference = const HaloNDimElement&; + using pointer = const HaloNDimElement *; + using reference = const HaloNDimElement &; using difference_type = signed long; LinearizedHaloNDimIterator() = delete; @@ -203,7 +199,7 @@ namespace grb { * * IF \p system is not valid anymore, then also \c this is not. */ - LinearizedHaloNDimIterator( const SystemType& system ) noexcept : + LinearizedHaloNDimIterator( const SystemType & system ) noexcept : _point( system ), _neighbors_subspace( DIMS, system.halo() + 1 ), _neighbors_start( DIMS ), @@ -217,7 +213,7 @@ namespace grb { SelfType & operator=( const SelfType & ) = default; - bool operator!=( const SelfType &other ) const { + bool operator!=( const SelfType & other ) const { return this->_point._position != other._point._position; // use linear coordinate } @@ -226,7 +222,7 @@ namespace grb { } pointer operator->() const { - return &(this->_point); + return &( this->_point ); } /** @@ -243,10 +239,10 @@ namespace grb { * Does \b not advance the element, which should be done manually via #next_element(). */ void next_neighbour() { - if( !has_more_neighbours() ) { - throw std::out_of_range("the current element has no more neighbors"); + if( ! has_more_neighbours() ) { + throw std::out_of_range( "the current element has no more neighbors" ); } - ++(this->_neighbor_iter); + ++( this->_neighbor_iter ); this->on_neighbor_iter_update(); this->_point._position++; } @@ -255,20 +251,19 @@ namespace grb { * Tells whether the system has more elements. */ bool has_more_elements() const { - return this->_point.get_element_linear() != (this->_point._system)->base_system_size(); + return this->_point.get_element_linear() != ( this->_point._system )->base_system_size(); } /** * Moves \c this to point to the next element, setting the neighbor as the first one. */ void next_element() { - if( !has_more_elements() ) { - throw std::out_of_range("the system has no more elements"); + if( ! has_more_elements() ) { + throw std::out_of_range( "the system has no more elements" ); } size_t num_neighbours = this->_neighbors_subspace.system_size(); - size_t neighbour_position_offset = - this->_neighbors_subspace.ndim_to_linear( this->_neighbor_iter->get_position() ); - ++(this->_point._element_iter); + size_t neighbour_position_offset = this->_neighbors_subspace.ndim_to_linear( this->_neighbor_iter->get_position() ); + ++( this->_point._element_iter ); this->on_element_advance(); this->_point._position -= neighbour_position_offset; this->_point._position += num_neighbours; @@ -278,9 +273,9 @@ namespace grb { * Moves \c this to point to the next neighbor, also advancing the element if needed. */ SelfType & operator++() noexcept { - ++(this->_neighbor_iter); - if( !has_more_neighbours() ) { - ++(this->_point._element_iter); + ++( this->_neighbor_iter ); + if( ! has_more_neighbours() ) { + ++( this->_point._element_iter ); this->on_element_advance(); } else { @@ -302,7 +297,7 @@ namespace grb { throw std::range_error( "neighbor linear value beyond system" ); } VectorType final_element( DIMS ); - size_t neighbor_index = (this->_point._system->neighbour_linear_to_element( final_position, final_element )); + size_t neighbor_index = ( this->_point._system->neighbour_linear_to_element( final_position, final_element ) ); this->_point._element_iter = VectorIteratorType( *this->_point._system, final_element.cbegin() ); this->_point._position = final_position; @@ -323,9 +318,8 @@ namespace grb { * * It throws if the result cannot be stored as a difference_type variable. */ - difference_type operator-( const SelfType &other ) const { - return grb::utils::compute_signed_distance< difference_type, SizeType >( - _point.get_position(), other._point.get_position() ); + difference_type operator-( const SelfType & other ) const { + return grb::utils::compute_signed_distance< difference_type, SizeType >( _point.get_position(), other._point.get_position() ); } /** @@ -333,7 +327,7 @@ namespace grb { * * The implementation depends on the logic of operator++. */ - static SelfType make_system_end_iterator( const SystemType& system ) { + static SelfType make_system_end_iterator( const SystemType & system ) { SelfType result( system ); // go to the very first point outside of space result._point._element_iter = VectorIteratorType::make_system_end_iterator( system ); @@ -355,8 +349,7 @@ namespace grb { */ inline void on_neighbor_iter_update() { for( size_t i = 0; i < DIMS; i++ ) { - this->_point._neighbor[i] = this->_neighbors_start[i] - + this->_neighbor_iter->get_position()[i]; + this->_point._neighbor[ i ] = this->_neighbors_start[ i ] + this->_neighbor_iter->get_position()[ i ]; } } @@ -367,11 +360,7 @@ namespace grb { void on_element_update() { // reset everything VectorType neighbors_range( DIMS ); - this->_point._system->compute_neighbors_range( - this->_point._element_iter->get_position(), - this->_neighbors_start, - neighbors_range - ); + this->_point._system->compute_neighbors_range( this->_point._element_iter->get_position(), this->_neighbors_start, neighbors_range ); // re-target _neighbors_subspace this->_neighbors_subspace.retarget( neighbors_range ); } @@ -391,7 +380,7 @@ namespace grb { }; } // namespace multigrid - } // namespace utils + } // namespace utils } // namespace grb #endif // _H_GRB_ALGORITHMS_MULTIGRID_LINEARIZED_HALO_NDIM_ITERATOR diff --git a/include/graphblas/utils/multigrid/linearized_halo_ndim_system.hpp b/include/graphblas/utils/multigrid/linearized_halo_ndim_system.hpp index 400fdd3ab..1ebe04b73 100644 --- a/include/graphblas/utils/multigrid/linearized_halo_ndim_system.hpp +++ b/include/graphblas/utils/multigrid/linearized_halo_ndim_system.hpp @@ -24,20 +24,19 @@ #ifndef _H_GRB_ALGORITHMS_MULTIGRID_LINEARIZED_HALO_NDIM_SYSTEM #define _H_GRB_ALGORITHMS_MULTIGRID_LINEARIZED_HALO_NDIM_SYSTEM -#include -#include #include #include #include +#include #ifdef _DEBUG #include #endif #include "array_vector_storage.hpp" #include "dynamic_vector_storage.hpp" -#include "ndim_vector.hpp" -#include "linearized_ndim_system.hpp" #include "linearized_halo_ndim_iterator.hpp" +#include "linearized_ndim_system.hpp" +#include "ndim_vector.hpp" namespace grb { namespace utils { @@ -84,7 +83,7 @@ namespace grb { template< size_t DIMS, typename SizeType - > class LinearizedHaloNDimSystem: + > class LinearizedHaloNDimSystem : public LinearizedNDimSystem< SizeType, ArrayVectorStorage< DIMS, SizeType > > { public: using VectorType = ArrayVectorStorage< DIMS, SizeType >; @@ -108,16 +107,16 @@ namespace grb { _halo( halo ) { for( SizeType __size : sizes ) { - if ( __size < halo + 1 ) { + if( __size < halo + 1 ) { throw std::invalid_argument( - std::string( "the halo (" + std::to_string(halo) + - std::string( ") goes beyond a system size (" ) + - std::to_string( __size) + std::string( ")" ) ) ); + std::string( "the halo (" + std::to_string( halo ) + + std::string( ") goes beyond a system size (" ) + + std::to_string( __size ) + std::string( ")" ) ) ); } } - this->_system_size = init_neigh_to_base_search( this->get_sizes(), - _halo, this->_dimension_limits ); + this->_system_size = init_neigh_to_base_search( + this->get_sizes(), _halo, this->_dimension_limits ); assert( this->_dimension_limits.size() == DIMS ); } @@ -181,16 +180,12 @@ namespace grb { * otherwise (on corner, edge, or face). */ void compute_neighbors_range( - const VectorType &element_coordinates, - VectorType &neighbors_start, - VectorType &neighbors_range + const VectorType & element_coordinates, + VectorType & neighbors_start, + VectorType & neighbors_range ) const noexcept { compute_first_neigh_and_range( this->get_sizes(), - this->_halo, - element_coordinates, - neighbors_start, - neighbors_range - ); + this->_halo, element_coordinates, neighbors_start, neighbors_range ); } /** @@ -206,9 +201,9 @@ namespace grb { * \a 0<=iget_sizes(), this->_system_size, this->_dimension_limits, this->_halo, neighbor_linear, base_element_vector ); @@ -216,7 +211,8 @@ namespace grb { private: const SizeType _halo; - std::vector< NDimVector< SizeType, SizeType, DynamicVectorStorage< SizeType > > > _dimension_limits; + std::vector< NDimVector< SizeType, SizeType, + DynamicVectorStorage< SizeType > > > _dimension_limits; size_t _system_size; /** @@ -242,12 +238,12 @@ namespace grb { * @return size_t the total number of neighbors for this configuration and this dimension */ static size_t accumulate_dimension_neighbours( - const NDimVector< SizeType, SizeType, DynamicVectorStorage< SizeType > > &prev_neighs, - SizeType* coords_buffer, + const NDimVector< SizeType, SizeType, DynamicVectorStorage< SizeType > > & prev_neighs, + SizeType * coords_buffer, size_t halo, size_t local_size ) { - size_t neighs =0; + size_t neighs = 0; size_t h = 0; for( ; h < halo && local_size > 1; h++ ) { *coords_buffer = h; @@ -278,14 +274,14 @@ namespace grb { */ static void compute_dim0_neighbors( size_t halo, - NDimVector< SizeType, SizeType, DynamicVectorStorage< SizeType > >& config_neighbors + NDimVector< SizeType, SizeType, DynamicVectorStorage< SizeType > > & config_neighbors ) { - using it_type = typename NDimVector< SizeType, SizeType, - DynamicVectorStorage< SizeType > >::DomainIterator; + using it_type = typename NDimVector< SizeType, SizeType, DynamicVectorStorage< SizeType > >::DomainIterator; it_type end = config_neighbors.domain_end(); for( it_type it = config_neighbors.domain_begin(); it != end; ++it ) { size_t res = 1; - for( size_t h: it->get_position() ) res *= (h + 1 + halo); + for( size_t h : it->get_position() ) + res *= ( h + 1 + halo ); config_neighbors.at( it->get_position() ) = res; } } @@ -328,42 +324,43 @@ namespace grb { * @return size_t the number of neighbors of the entire system */ static size_t init_neigh_to_base_search( - typename LinearizedNDimSystem< SizeType, - ArrayVectorStorage< DIMS, SizeType > >::ConstVectorReference - sizes, + typename LinearizedNDimSystem< SizeType, ArrayVectorStorage< DIMS, SizeType > + >::ConstVectorReference sizes, size_t halo, - std::vector< NDimVector< SizeType, SizeType, DynamicVectorStorage< SizeType > > >& dimension_limits + std::vector< NDimVector< SizeType, SizeType, DynamicVectorStorage< SizeType > > > & dimension_limits ) { using nd_vec = NDimVector< SizeType, SizeType, DynamicVectorStorage< SizeType > >; using nd_vec_iterator = typename nd_vec::DomainIterator; - std::vector halo_sizes( DIMS, halo + 1); - dimension_limits.emplace_back(halo_sizes); + std::vector< size_t > halo_sizes( DIMS, halo + 1 ); + dimension_limits.emplace_back( halo_sizes ); // initialize values - compute_dim0_neighbors( halo, dimension_limits[0] ); + compute_dim0_neighbors( halo, dimension_limits[ 0 ] ); for( size_t i = 1; i < DIMS; i++ ) { - std::vector halos( DIMS - i, halo + 1 ); - dimension_limits.emplace_back(halos); + std::vector< size_t > halos( DIMS - i, halo + 1 ); + dimension_limits.emplace_back( halos ); } std::array< SizeType, DIMS > prev_coords_buffer; // store at most DIMS values - SizeType* const prev_coords = prev_coords_buffer.data(); - SizeType* const second = prev_coords + 1; // store previous coordinates from second position + SizeType * const prev_coords = prev_coords_buffer.data(); + SizeType * const second = prev_coords + 1; // store previous coordinates from second position for( size_t dimension = 1; dimension < DIMS; dimension++ ) { - const nd_vec& prev_neighs{dimension_limits[dimension - 1]}; - nd_vec& current_neighs{dimension_limits[dimension]}; + const nd_vec & prev_neighs { dimension_limits[ dimension - 1 ] }; + nd_vec & current_neighs { dimension_limits[ dimension ] }; nd_vec_iterator end = current_neighs.domain_end(); for( nd_vec_iterator it = current_neighs.domain_begin(); it != end; ++it ) { typename nd_vec::ConstDomainVectorReference current_halo_coords = it->get_position(); std::copy( it->get_position().cbegin(), it->get_position().cend(), second ); - size_t local_size = sizes[dimension - 1]; - const size_t neighs = accumulate_dimension_neighbours(prev_neighs, prev_coords, halo, local_size); - current_neighs.at(current_halo_coords) = neighs; + size_t local_size = sizes[ dimension - 1 ]; + const size_t neighs = accumulate_dimension_neighbours( prev_neighs, + prev_coords, halo, local_size ); + current_neighs.at( current_halo_coords ) = neighs; } } - return accumulate_dimension_neighbours( dimension_limits[DIMS - 1], prev_coords, halo, sizes.back() ); + return accumulate_dimension_neighbours( dimension_limits[ DIMS - 1 ], + prev_coords, halo, sizes.back() ); } /** @@ -382,22 +379,24 @@ namespace grb { * @param[out] neighbors_range stores the range of neighbors around \p element_coordinates */ static void compute_first_neigh_and_range( - const ArrayVectorStorage< DIMS, SizeType > &_system_sizes, + const ArrayVectorStorage< DIMS, SizeType > & _system_sizes, const SizeType halo, - const ArrayVectorStorage< DIMS, SizeType > &element_coordinates, - ArrayVectorStorage< DIMS, SizeType > &neighbors_start, - ArrayVectorStorage< DIMS, SizeType > &neighbors_range + const ArrayVectorStorage< DIMS, SizeType > & element_coordinates, + ArrayVectorStorage< DIMS, SizeType > & neighbors_start, + ArrayVectorStorage< DIMS, SizeType > & neighbors_range ) { - for( SizeType i = 0; i < DIMS/* - 1*/; i++ ) { - const SizeType start = element_coordinates[i] <= halo ? 0 : element_coordinates[i] - halo; - const SizeType end = std::min( element_coordinates[i] + halo, _system_sizes[i] - 1 ); - neighbors_start[i] = start; - neighbors_range[i] = end - start + 1; + for( SizeType i = 0; i < DIMS /* - 1*/; i++ ) { + const SizeType start = element_coordinates[ i ] <= halo ? 0 : + element_coordinates[ i ] - halo; + const SizeType end = std::min( element_coordinates[ i ] + halo, _system_sizes[ i ] - 1 ); + neighbors_start[ i ] = start; + neighbors_range[ i ] = end - start + 1; } } #ifdef _DEBUG - template< typename IterType > static std::ostream & print_sequence( IterType begin, IterType end ) { + template< typename IterType > + static std::ostream & print_sequence( IterType begin, IterType end ) { for( ; begin != end; ++begin ) { std::cout << *begin << ' '; } @@ -423,17 +422,17 @@ namespace grb { * @return size_t the index of the neighbor within the element's neighbors */ static size_t map_neigh_to_base_and_index( - const std::array< SizeType, DIMS > &sizes, + const std::array< SizeType, DIMS > & sizes, size_t system_size, - const std::vector< NDimVector< SizeType, SizeType, DynamicVectorStorage< SizeType > > > - &neighbors_per_dimension, + const std::vector< NDimVector< SizeType, SizeType, DynamicVectorStorage< SizeType > > + > & neighbors_per_dimension, SizeType halo, SizeType neighbor_linear, - ArrayVectorStorage< DIMS, SizeType > &element_vector - ){ + ArrayVectorStorage< DIMS, SizeType > & element_vector + ) { if( neighbor_linear > system_size ) { throw std::invalid_argument( "neighbor number ( " + std::to_string( neighbor_linear ) - + " ) >= system size ( " + std::to_string( system_size ) + " )"); + + " ) >= system size ( " + std::to_string( system_size ) + " )" ); } ArrayVectorStorage< DIMS, SizeType > configuration( DIMS ); #ifdef _DEBUG @@ -441,8 +440,7 @@ namespace grb { #endif std::fill_n( configuration.begin(), DIMS, 0 ); - for( size_t _dim = DIMS; _dim > 0; _dim--) { - + for( size_t _dim = DIMS; _dim > 0; _dim-- ) { // each iteration looks for the base element along a dimension via the number of neighbors // each element has: once previous_neighs reaches neighbor_linear, the corresponding // base element is found; if the control reaches the end, this means it must explore @@ -453,21 +451,20 @@ namespace grb { // start from highest dimension const size_t dimension = _dim - 1; // how many elements along this dimension - const size_t dimension_size = sizes[dimension]; + const size_t dimension_size = sizes[ dimension ]; // configurations of neighbors along this dimension // (e.g., corner, edge; or edge, inner element) - const NDimVector< SizeType, SizeType, DynamicVectorStorage< SizeType > > - & neighbors = neighbors_per_dimension[dimension]; + const NDimVector< SizeType, SizeType, DynamicVectorStorage< SizeType > > & neighbors = + neighbors_per_dimension[ dimension ]; // coordinate to modify to identify each configuration SizeType * const halo_coords_begin = configuration.data() + dimension; #ifdef _DEBUG - std::cout << "DIMENSION " << dimension << std::endl - << "- setup - neighbour " << neighbor_linear << std::endl - << "\thalo : "; + std::cout << "DIMENSION " << dimension << std::endl << "- setup - neighbour " + << neighbor_linear << std::endl << "\thalo : "; print_sequence( halo_coords_begin, halo_coords_end ) << std::endl; #endif - size_t h =0; // configuration type along this dimension + size_t h = 0; // configuration type along this dimension size_t previous_neighs = 0; *halo_coords_begin = h; // account for neighbors in the first elements along the dimension, within halo distance: @@ -481,16 +478,14 @@ namespace grb { halo_max_neighs = neighbors.at( halo_coords_begin ); } #ifdef _DEBUG - std::cout << "- initial halo - neighbour " << neighbor_linear << std::endl - << "\th " << h << std::endl - << "\thalo : "; + std::cout << "- initial halo - neighbour " << neighbor_linear << std::endl << "\th " << h << std::endl << "\thalo : "; print_sequence( halo_coords_begin, halo_coords_end ) << std::endl; std::cout << "\thalo_max_neighs " << halo_max_neighs << std::endl; #endif - if ( h < halo ){ + if( h < halo ) { // we have already counted enough neighbors: neighbor_linear is thus a neighbor // of one of the first (< halo) elements along this dimension: go to next dimension - element_vector[dimension] = h; + element_vector[ dimension ] = h; neighbor_linear -= previous_neighs; #ifdef _DEBUG std::cout << "end neighbour " << neighbor_linear << std::endl; @@ -504,17 +499,17 @@ namespace grb { const size_t distance_from_halo = ( neighbor_linear - previous_neighs ) / halo_max_neighs; #ifdef _DEBUG std::cout << "- before middle elements - neighbour " << neighbor_linear << std::endl - << "\tprevious_neighs " << previous_neighs << std::endl - << "\thalo_max_neighs " << halo_max_neighs << std::endl - << "\tdistance_from_halo " << distance_from_halo << std::endl - << "\tdimension_size " << dimension_size << std::endl; + << "\tprevious_neighs " << previous_neighs << std::endl + << "\thalo_max_neighs " << halo_max_neighs << std::endl + << "\tdistance_from_halo " << distance_from_halo << std::endl + << "\tdimension_size " << dimension_size << std::endl; #endif - if ( distance_from_halo < dimension_size - 2 * halo ) { + if( distance_from_halo < dimension_size - 2 * halo ) { // the base element is one of the internal elements along this dimension: // hence return its diatance from the halo + the halo itself (= distance from // beginning of the space) - element_vector[dimension] = distance_from_halo + halo; - neighbor_linear -= (previous_neighs + distance_from_halo * halo_max_neighs) ; + element_vector[ dimension ] = distance_from_halo + halo; + neighbor_linear -= ( previous_neighs + distance_from_halo * halo_max_neighs ); #ifdef _DEBUG std::cout << "end neighbour " << neighbor_linear << std::endl; #endif @@ -546,18 +541,17 @@ namespace grb { #endif // ( dimension_size - 1 ) because coordinates are 0-based and neighbor // is "inside" range [ previous_neighs, previous_neighs + halo_max_neighs ] - element_vector[dimension] = dimension_size - 1 - h; + element_vector[ dimension ] = dimension_size - 1 - h; #ifdef _DEBUG std::cout << "end neighbour " << neighbor_linear << std::endl; #endif } return neighbor_linear; } - }; } // namespace multigrid - } // namespace utils + } // namespace utils } // namespace grb #endif // _H_GRB_ALGORITHMS_MULTIGRID_LINEARIZED_HALO_NDIM_SYSTEM diff --git a/include/graphblas/utils/multigrid/linearized_ndim_iterator.hpp b/include/graphblas/utils/multigrid/linearized_ndim_iterator.hpp index 199d08926..9b0e61a8a 100644 --- a/include/graphblas/utils/multigrid/linearized_ndim_iterator.hpp +++ b/include/graphblas/utils/multigrid/linearized_ndim_iterator.hpp @@ -24,12 +24,11 @@ #ifndef _H_GRB_ALGORITHMS_MULTIGRID_NDIM_ITERATOR #define _H_GRB_ALGORITHMS_MULTIGRID_NDIM_ITERATOR -#include #include +#include +#include #include #include -#include -#include #include @@ -63,7 +62,7 @@ namespace grb { public: using VectorType = InternalVectorType; using LinNDimSysType = LinearizedNDimSystem< SizeType, VectorType >; - using ConstVectorReference = const VectorType&; + using ConstVectorReference = const VectorType &; using SelfType = LinearizedNDimIterator< SizeType, InternalVectorType >; /** @@ -73,7 +72,7 @@ namespace grb { */ struct NDimPoint { private: - const LinNDimSysType* system; // pointer because of copy assignment + const LinNDimSysType * system; // pointer because of copy assignment VectorType coords; public: @@ -81,18 +80,15 @@ namespace grb { NDimPoint() = delete; - NDimPoint( const NDimPoint& ) = default; + NDimPoint( const NDimPoint & ) = default; - NDimPoint( NDimPoint&& ) = delete; + NDimPoint( NDimPoint && ) = delete; - NDimPoint( const LinNDimSysType& _system ) noexcept : - system( &_system ), - coords( _system.dimensions() ) - { + NDimPoint( const LinNDimSysType & _system ) noexcept : system( &_system ), coords( _system.dimensions() ) { std::fill_n( this->coords.begin(), _system.dimensions(), 0 ); } - NDimPoint& operator=( const NDimPoint& ) = default; + NDimPoint & operator=( const NDimPoint & ) = default; inline ConstVectorReference get_position() const { return coords; @@ -106,8 +102,8 @@ namespace grb { // interface for std::random_access_iterator using iterator_category = std::random_access_iterator_tag; using value_type = NDimPoint; - using pointer = const value_type*; - using reference = const value_type&; + using pointer = const value_type *; + using reference = const value_type &; using difference_type = signed long; /** @@ -118,9 +114,7 @@ namespace grb { * If \p _system is not a valid object anymore, all iterators created from it are also * not valid. */ - LinearizedNDimIterator( const LinNDimSysType &_system ) noexcept : - _p( _system ) - {} + LinearizedNDimIterator( const LinNDimSysType & _system ) noexcept : _p( _system ) {} /** * Construct a new LinearizedNDimIterator object from the original LinNDimSysType @@ -132,7 +126,8 @@ namespace grb { * not valid. */ template< typename IterT > LinearizedNDimIterator( - const LinNDimSysType &_system, IterT begin + const LinNDimSysType & _system, + IterT begin ) noexcept : _p( _system ) { @@ -141,10 +136,9 @@ namespace grb { LinearizedNDimIterator() = delete; - LinearizedNDimIterator( const SelfType &original ): - _p( original._p ) {} + LinearizedNDimIterator( const SelfType & original ) : _p( original._p ) {} - SelfType& operator=( const SelfType &original ) = default; + SelfType & operator=( const SelfType & original ) = default; ~LinearizedNDimIterator() {} @@ -156,7 +150,7 @@ namespace grb { bool rewind = true; // rewind only the first N-1 coordinates for( size_t i = 0; i < this->_p.system->dimensions() - 1 && rewind; i++ ) { - SizeType& coord = this->_p.coords[ i ]; + SizeType & coord = this->_p.coords[ i ]; // must rewind dimension if we wrap-around SizeType plus = coord + 1; rewind = plus >= this->_p.system->get_sizes()[ i ]; @@ -180,7 +174,7 @@ namespace grb { SelfType & operator+=( size_t offset ) { size_t linear = _p.get_linear_position() + offset; if( linear > _p.system->system_size() ) { - throw std::invalid_argument("increment is too large"); + throw std::invalid_argument( "increment is too large" ); } if( offset == 1 ) { return operator++(); @@ -194,10 +188,9 @@ namespace grb { * * It throws if the result cannot be stored as a difference_type variable. */ - difference_type operator-( const SelfType &other ) const { + difference_type operator-( const SelfType & other ) const { return grb::utils::compute_signed_distance< difference_type, SizeType >( _p.get_linear_position(), other._p.get_linear_position() ); - } reference operator*() const { @@ -208,16 +201,16 @@ namespace grb { return &( this->_p ); } - bool operator!=( const SelfType &o ) const { + bool operator!=( const SelfType & o ) const { const size_t dims = this->_p.system->dimensions(); if( dims != o._p.system->dimensions() ) { - throw std::invalid_argument("system sizes do not match"); + throw std::invalid_argument( "system sizes do not match" ); } bool equal = true; - for( size_t i =0; i < dims && equal; i++) { - equal &= ( this->_p.coords[i] == o._p.coords[i] ); + for( size_t i = 0; i < dims && equal; i++ ) { + equal &= ( this->_p.coords[ i ] == o._p.coords[ i ] ); } - return !equal; + return ! equal; } /** @@ -225,7 +218,7 @@ namespace grb { * * Its implementation depending on the logic in operator++. */ - static SelfType make_system_end_iterator( const LinNDimSysType &_system ) { + static SelfType make_system_end_iterator( const LinNDimSysType & _system ) { // fill with 0s SelfType iter( _system ); size_t last = iter->system->dimensions() - 1; @@ -239,7 +232,7 @@ namespace grb { }; } // namespace multigrid - } // namespace utils + } // namespace utils } // namespace grb #endif // _H_GRB_ALGORITHMS_MULTIGRID_NDIM_ITERATOR diff --git a/include/graphblas/utils/multigrid/linearized_ndim_system.hpp b/include/graphblas/utils/multigrid/linearized_ndim_system.hpp index 7b3c94341..a02a0c631 100644 --- a/include/graphblas/utils/multigrid/linearized_ndim_system.hpp +++ b/include/graphblas/utils/multigrid/linearized_ndim_system.hpp @@ -24,17 +24,16 @@ #ifndef _H_GRB_ALGORITHMS_MULTIGRID_NDIM_SYSTEM_LINEARIZER #define _H_GRB_ALGORITHMS_MULTIGRID_NDIM_SYSTEM_LINEARIZER -#include #include -#include -#include -#include #include -#include #include +#include +#include +#include +#include -#include "ndim_system.hpp" #include "linearized_ndim_iterator.hpp" +#include "ndim_system.hpp" namespace grb { namespace utils { @@ -60,9 +59,9 @@ namespace grb { template< typename SizeType, typename InternalVectorType - > class LinearizedNDimSystem: public NDimSystem< SizeType, InternalVectorType > { + > class LinearizedNDimSystem : public NDimSystem< SizeType, InternalVectorType > { public: - static_assert( std::is_integral< SizeType >::value, "SizeType must be an integral type"); + static_assert( std::is_integral< SizeType >::value, "SizeType must be an integral type" ); using BaseType = NDimSystem< SizeType, InternalVectorType >; using SelfType = LinearizedNDimSystem< SizeType, InternalVectorType >; @@ -78,32 +77,39 @@ namespace grb { * where each iterator's position stores the size along each dimension; example: * *begin is the size along dimension 0, *(++begin) is the size along dimension 1 ... */ - template< typename IterT > LinearizedNDimSystem( IterT begin, IterT end) noexcept : + template< typename IterT > + LinearizedNDimSystem( + IterT begin, + IterT end + ) noexcept : BaseType( begin, end ), _offsets( std::distance( begin, end ) ) { - this->_system_size = compute_range_product( begin, end, this->_offsets.begin() ) ; + this->_system_size = compute_range_product( begin, end, this->_offsets.begin() ); } /** * Construct a new LinearizedNDimSystem object with dimensions \p _sizes.size() * and sizes stored in \p _sizes. */ - LinearizedNDimSystem( const std::vector< size_t > &_sizes ) noexcept : + LinearizedNDimSystem( const std::vector< size_t > & _sizes ) noexcept : LinearizedNDimSystem( _sizes.cbegin(), _sizes.cend() ) {} /** * Construct a new LinearizedNDimSystem object with \p _dimensions dimensions * and sizes all equal to \p max_value. */ - LinearizedNDimSystem( size_t _dimensions, size_t _size ) noexcept : + LinearizedNDimSystem( + size_t _dimensions, + size_t _size + ) noexcept : BaseType( _dimensions, _size ), _offsets( _dimensions ), _system_size( _dimensions ) { SizeType v = 1; - for( size_t i =0; i < _dimensions; i++ ) { - this->_offsets[i] = v; + for( size_t i = 0; i < _dimensions; i++ ) { + this->_offsets[ i ] = v; v *= _size; } this->_system_size = v; @@ -111,19 +117,21 @@ namespace grb { LinearizedNDimSystem() = delete; - LinearizedNDimSystem( const SelfType &original ) = default; + LinearizedNDimSystem( const SelfType & original ) = default; - LinearizedNDimSystem( SelfType &&original ) noexcept: - BaseType( std::move(original) ), _offsets( std::move( original._offsets ) ), - _system_size( original._system_size ) { - original._system_size = 0; + LinearizedNDimSystem( SelfType && original ) noexcept : + BaseType( std::move( original ) ), + _offsets( std::move( original._offsets ) ), + _system_size( original._system_size ) + { + original._system_size = 0; } ~LinearizedNDimSystem() {} - SelfType& operator=( const SelfType & ) = default; + SelfType & operator=( const SelfType & ) = default; - SelfType& operator=( SelfType &&original ) = delete; + SelfType & operator=( SelfType && original ) = delete; /** * Computes the size of the system, i.e. its number of elements; @@ -147,15 +155,18 @@ namespace grb { * @param[in] linear linear index * @param[out] output output vector \p linear corresponds to */ - void linear_to_ndim( size_t linear, VectorReference output ) const { + void linear_to_ndim( + size_t linear, + VectorReference output + ) const { if( linear > this->_system_size ) { throw std::range_error( "linear value beyond system" ); } for( size_t _i = this->_offsets.dimensions(); _i > 0; _i-- ) { const size_t dim = _i - 1; - const size_t coord = linear / this->_offsets[dim]; - output[dim] = coord; - linear -= ( coord * this->_offsets[dim] ); + const size_t coord = linear / this->_offsets[ dim ]; + output[ dim ] = coord; + linear -= ( coord * this->_offsets[ dim ] ); } assert( linear == 0 ); } @@ -165,7 +176,7 @@ namespace grb { * a const reference to \p InternalVectorType and checks whether each value in the input * vector \p ndim_vector is within the system sizes (otherwise it throws). */ - size_t ndim_to_linear_check( ConstVectorReference ndim_vector) const { + size_t ndim_to_linear_check( ConstVectorReference ndim_vector ) const { return this->ndim_to_linear_check( ndim_vector.storage() ); } @@ -178,7 +189,7 @@ namespace grb { size_t ndim_to_linear_check( ConstVectorStorageType ndim_vector ) const { size_t linear = 0; for( size_t i = 0; i < this->dimensions(); i++ ) { - if( ndim_vector[i] >= this->get_sizes()[i] ) { + if( ndim_vector[ i ] >= this->get_sizes()[ i ] ) { throw std::invalid_argument( "input vector beyond system sizes" ); } } @@ -190,7 +201,7 @@ namespace grb { * a const reference to \p InternalVectorType but does not check whether each value in the input * vector \p ndim_vector is within the system sizes. */ - size_t ndim_to_linear( ConstVectorReference ndim_vector) const { + size_t ndim_to_linear( ConstVectorReference ndim_vector ) const { return this->ndim_to_linear( ndim_vector.storage() ); } @@ -202,7 +213,7 @@ namespace grb { size_t ndim_to_linear( ConstVectorStorageType ndim_vector ) const { size_t linear = 0; for( size_t i = 0; i < this->dimensions(); i++ ) { - linear += this->_offsets[i] * ndim_vector[i]; + linear += this->_offsets[ i ] * ndim_vector[ i ]; } return linear; } @@ -215,12 +226,13 @@ namespace grb { */ void retarget( ConstVectorReference _new_sizes ) { if( _new_sizes.dimensions() != this->_sizes.dimensions() ) { - throw std::invalid_argument("new system must have same dimensions as previous: new " - + std::to_string( _new_sizes.dimensions() ) + ", old " - + std::to_string( this->_sizes.dimensions() ) ); + throw std::invalid_argument( + "new system must have same dimensions as previous: new " + std::to_string( _new_sizes.dimensions() ) + + ", old " + std::to_string( this->_sizes.dimensions() ) ); } this->_sizes = _new_sizes; // copy - this->_system_size = compute_range_product( _new_sizes.begin(), _new_sizes.end(), this->_offsets.begin() ) ; + this->_system_size = compute_range_product( _new_sizes.begin(), _new_sizes.end(), + this->_offsets.begin() ); } /** @@ -254,7 +266,11 @@ namespace grb { template< typename IterIn, typename IterOut - > static size_t compute_range_product( IterIn in_begin, IterIn in_end, IterOut out_begin ) { + > static size_t compute_range_product( + IterIn in_begin, + IterIn in_end, + IterOut out_begin + ) { size_t prod = 1; for( ; in_begin != in_end; ++in_begin, ++out_begin ) { *out_begin = prod; @@ -265,7 +281,7 @@ namespace grb { }; } // namespace multigrid - } // namespace utils + } // namespace utils } // namespace grb #endif // _H_GRB_ALGORITHMS_MULTIGRID_NDIM_SYSTEM_LINEARIZER diff --git a/include/graphblas/utils/multigrid/ndim_system.hpp b/include/graphblas/utils/multigrid/ndim_system.hpp index f184a7042..5df62ace2 100644 --- a/include/graphblas/utils/multigrid/ndim_system.hpp +++ b/include/graphblas/utils/multigrid/ndim_system.hpp @@ -24,11 +24,10 @@ #ifndef _H_GRB_ALGORITHMS_MULTIGRID_NDIM_SYSTEM #define _H_GRB_ALGORITHMS_MULTIGRID_NDIM_SYSTEM -#include #include -#include -#include #include +#include +#include namespace grb { namespace utils { @@ -48,11 +47,11 @@ namespace grb { typename InternalVectorType > class NDimSystem { public: - static_assert( std::is_integral< SizeType >::value, "SizeType must be an integral type"); + static_assert( std::is_integral< SizeType >::value, "SizeType must be an integral type" ); using VectorType = InternalVectorType; - using VectorReference = VectorType&; - using ConstVectorReference = const VectorType&; + using VectorReference = VectorType &; + using ConstVectorReference = const VectorType &; using SelfType = NDimSystem< SizeType, InternalVectorType >; /** @@ -66,9 +65,8 @@ namespace grb { * @param begin range begin * @param end end of range */ - template< typename IterType > NDimSystem( IterType begin, IterType end) noexcept : - _sizes( std::distance( begin, end ) ) - { + template< typename IterType > + NDimSystem( IterType begin, IterType end ) noexcept : _sizes( std::distance( begin, end ) ) { std::copy( begin, end, this->_sizes.begin() ); } @@ -76,16 +74,14 @@ namespace grb { * Construct a new NDimSystem object from an std::vector<>, taking its values * as system sizes and its length as number of dimensions. */ - NDimSystem( const std::vector< size_t > &_sizes ) noexcept : + NDimSystem( const std::vector< size_t > & _sizes ) noexcept : SelfType( _sizes.cbegin(), _sizes.cend() ) {} /** * Construct a new NDimSystem object of dimensions \p dimensions * and with all sizes initialized to \p max_size */ - NDimSystem( size_t _dimensions, size_t max_size ) noexcept : - _sizes( _dimensions ) - { + NDimSystem( size_t _dimensions, size_t max_size ) noexcept : _sizes( _dimensions ) { std::fill_n( this->_sizes.begin(), _dimensions, max_size ); } @@ -95,9 +91,9 @@ namespace grb { NDimSystem( SelfType && ) = delete; - SelfType & operator=( const SelfType &original ) = default; + SelfType & operator=( const SelfType & original ) = default; - SelfType & operator=( SelfType &&original ) = delete; + SelfType & operator=( SelfType && original ) = delete; inline size_t dimensions() const noexcept { return _sizes.dimensions(); @@ -116,7 +112,7 @@ namespace grb { }; } // namespace multigrid - } // namespace utils + } // namespace utils } // namespace grb #endif // _H_GRB_ALGORITHMS_MULTIGRID_NDIM_SYSTEM diff --git a/include/graphblas/utils/multigrid/ndim_vector.hpp b/include/graphblas/utils/multigrid/ndim_vector.hpp index 7992f23f6..5a3ef4144 100644 --- a/include/graphblas/utils/multigrid/ndim_vector.hpp +++ b/include/graphblas/utils/multigrid/ndim_vector.hpp @@ -24,11 +24,11 @@ #ifndef _H_GRB_ALGORITHMS_MULTIGRID_NDIM_VECTOR #define _H_GRB_ALGORITHMS_MULTIGRID_NDIM_VECTOR +#include +#include +#include #include #include -#include -#include -#include #include "linearized_ndim_system.hpp" @@ -64,8 +64,8 @@ namespace grb { "the stored type is not default constructible" ); static_assert( std::is_integral< SizeType >::value, "SizeType must be integral" ); - using ConstDomainVectorReference = - typename LinearizedNDimSystem< SizeType, InternalVectorType >::ConstVectorReference; + using ConstDomainVectorReference = typename LinearizedNDimSystem< SizeType, + InternalVectorType >::ConstVectorReference; using ConstDomainVectorStorageType = typename InternalVectorType::ConstVectorStorageType; using DomainIterator = typename LinearizedNDimSystem< SizeType, InternalVectorType >::Iterator; using Selftype = NDimVector< DataType, SizeType, InternalVectorType >; @@ -77,9 +77,10 @@ namespace grb { * and number of dimensions equal to the range distance; the data values are * \b not initialized. */ - template< typename IterT > NDimVector( IterT begin, IterT end) : - _linearizer( begin, end ) - { + template< typename IterT > NDimVector( + IterT begin, + IterT end + ) : _linearizer( begin, end ) { this->data = new DataType[ _linearizer.system_size() ]; } @@ -88,26 +89,25 @@ namespace grb { * and number of dimensions equal to \p _sizes.size(); the data values are * \b not initialized. */ - NDimVector( const std::vector< size_t > &_sizes ) : + NDimVector( const std::vector< size_t > & _sizes ) : NDimVector( _sizes.cbegin(), _sizes.cend() ) {} - NDimVector( const Selftype& original ): + NDimVector( const Selftype & original ) : _linearizer( original._linearizer ), - data( new DataType[ original.data_size() ] ) + data( new DataType[ original.data_size() ] ) { std::copy_n( original.data, original.data_size(), this->data ); } - NDimVector( Selftype&& original ) noexcept: - _linearizer( std::move( original._linearizer ) ) - { + NDimVector( Selftype && original ) noexcept : + _linearizer( std::move( original._linearizer ) ) { this->data = original.data; original.data = nullptr; } - Selftype& operator=( const Selftype &original ) = delete; + Selftype & operator=( const Selftype & original ) = delete; - Selftype& operator=( Selftype &&original ) = delete; + Selftype & operator=( Selftype && original ) = delete; ~NDimVector() { this->clean_mem(); @@ -131,7 +131,7 @@ namespace grb { * Access the data element at N-dimension coordinate given by the iterable * \p coordinates. */ - inline DataType& at( ConstDomainVectorReference coordinates ) { + inline DataType & at( ConstDomainVectorReference coordinates ) { return this->data[ this->get_coordinate( coordinates.storage() ) ]; } @@ -139,7 +139,7 @@ namespace grb { * Const-access the data element at N-dimension coordinate given by the iterable * \p coordinates. */ - inline const DataType& at( ConstDomainVectorReference coordinates ) const { + inline const DataType & at( ConstDomainVectorReference coordinates ) const { return this->data[ this->get_coordinate( coordinates.storage() ) ]; } @@ -147,7 +147,7 @@ namespace grb { * Access the data element at N-dimension coordinate given by the vector * storage object \p coordinates. */ - inline DataType& at( ConstDomainVectorStorageType coordinates ) { + inline DataType & at( ConstDomainVectorStorageType coordinates ) { return this->data[ this->get_coordinate( coordinates ) ]; } @@ -155,7 +155,7 @@ namespace grb { * Const-access the data element at N-dimension coordinate given by the vector * storage object \p coordinates. */ - inline const DataType& at( ConstDomainVectorStorageType coordinates ) const { + inline const DataType & at( ConstDomainVectorStorageType coordinates ) const { return this->data[ this->get_coordinate( coordinates ) ]; } @@ -177,7 +177,7 @@ namespace grb { private: const LinearizedNDimSystem< SizeType, InternalVectorType > _linearizer; - DataType* data; + DataType * data; inline size_t get_coordinate( ConstDomainVectorStorageType coordinates ) const { return this->_linearizer.ndim_to_linear( coordinates ); @@ -188,14 +188,14 @@ namespace grb { } void clean_mem() { - if ( this->data == nullptr ) { + if( this->data == nullptr ) { delete[] this->data; } } }; } // namespace multigrid - } // namespace utils + } // namespace utils } // namespace grb #endif // _H_GRB_ALGORITHMS_MULTIGRID_NDIM_VECTOR diff --git a/include/graphblas/utils/telemetry/CSVWriter.hpp b/include/graphblas/utils/telemetry/CSVWriter.hpp index 94a7111b6..d92d5efd1 100644 --- a/include/graphblas/utils/telemetry/CSVWriter.hpp +++ b/include/graphblas/utils/telemetry/CSVWriter.hpp @@ -15,9 +15,11 @@ * limitations under the License. */ -/* - * @author Alberto Scolari - * @date 14th February, 2023 +/** + * @file CSVWriter.hpp + * @author Alberto Scolari (alberto.scolar@huawei.com) + * + * Definition for the CSVWriter class. */ #ifndef _H_GRB_UTILS_TELEMETRY_CSV_WRITER @@ -39,87 +41,54 @@ namespace grb { namespace utils { namespace telemetry { + /// standard CSV separator static constexpr char STD_CSV_SEP = ','; - template< typename TelControllerType, bool enabled, class T1, class... Ts > - class CSVWriter : public TelemetryBase< TelControllerType, enabled > { - public: - template< class U, class... Us > - struct is_csv_printable { - static constexpr bool value = std::is_arithmetic< U >::value; - }; - - template< class U1, class U2, class... Us > - struct is_csv_printable< U1, U2, Us... > { - static constexpr bool value = is_csv_printable< U1 >::value && is_csv_printable< U2, Us... >::value; - }; - - static_assert( is_csv_printable< T1, Ts... >::value, "not all types are printable" ); - - using self_t = CSVWriter< TelControllerType, enabled, T1, Ts... >; - - using base_t = TelemetryBase< TelControllerType, enabled >; - - CSVWriter() = delete; - - CSVWriter( const TelControllerType & tt, std::initializer_list< const char * > _headers, char _separator, size_t size ) : base_t( tt ) { - (void)tt; - (void)_headers; - (void)_separator; - (void)size; - } - - CSVWriter( const TelControllerType & tt, std::initializer_list< const char * > _headers ) : CSVWriter( tt, _headers, STD_CSV_SEP, 10 ) {} - - CSVWriter( const self_t & ) = delete; - - CSVWriter( self_t && ) = delete; - - self_t & operator=( const self_t & ) = delete; - - self_t & operator=( self_t && ) = delete; - - template< class... UTypes > - void add_line( UTypes &&... ) {} - - void clear() {} - - std::ostream & write_last_line_to_stream( std::ostream & stream ) const { - return stream; - } - - // print nothing - char last_line() const { - return '\0'; - } - - std::ostream & write_to_stream( std::ostream & stream ) const { - return stream; - } + template< class U, class... Us > + struct __is_csv_printable { + static constexpr bool value = std::is_arithmetic< U >::value; + }; - void write_to_file( const char * name ) const { - (void)name; - } + template< class U1, class U2, class... Us > + struct __is_csv_printable< U1, U2, Us... > { + static constexpr bool value = __is_csv_printable< U1 >::value + && __is_csv_printable< U2, Us... >::value; }; - template< typename TelControllerType, class T1, class... Ts > - class CSVWriter< TelControllerType, true, T1, Ts... > : public TelemetryBase< TelControllerType, true > { + /** + * Class to store numerical information in form of lines and emit it as a CSV, with + * heading, field separator and newlines. + * + * The user should add an line at once via #add_line( UTypes && ) and can + * then output it to an \a std::ostream or a file, together with the + * heading specified at construction. The output is a fully compliant CSV file + * that can be read by common tools like spreadsheets and parsers (e.g. Pandas, + * https://pandas.pydata.org/). This class allows easily emitting telemetry + * information and importing them into advanced tools for thourough analysis. + * + * This implementation assumes telemetry is enabled, since a specialization for + * disabled telemetry follows. + * It internally allocates memory dynamically to store the lines. + * Only numerical information can be stored. + * + * @tparam TelControllerType type for the telemetry controller + * @tparam enabled whether telemetry is enabled + * @tparam T1 numerical type of the first field to store (at least one is required) + * @tparam Ts numerical types of the following fields to store + */ + template< + typename TelControllerType, + bool enabled, + class T1, + class... Ts + > class CSVWriter : + public TelemetryBase< TelControllerType, enabled > { public: - template< class U, class... Us > - struct is_csv_printable { - static constexpr bool value = std::is_arithmetic< U >::value; - }; - - template< class U1, class U2, class... Us > - struct is_csv_printable< U1, U2, Us... > { - static constexpr bool value = is_csv_printable< U1 >::value && is_csv_printable< U2, Us... >::value; - }; + static_assert( __is_csv_printable< T1, Ts... >::value, "not all types are printable" ); - static_assert( is_csv_printable< T1, Ts... >::value, "not all types are printable" ); - - using self_t = CSVWriter< TelControllerType, true, T1, Ts... >; + using self_t = CSVWriter< TelControllerType, enabled, T1, Ts... >; - using base_t = TelemetryBase< TelControllerType, true >; + using base_t = TelemetryBase< TelControllerType, enabled >; class CSVLastTuple { public: @@ -137,7 +106,23 @@ namespace grb { CSVWriter() = delete; - CSVWriter( const TelControllerType & tt, std::initializer_list< const char * > _headers, char _separator, size_t size ) : base_t( tt ), separator( _separator ) { + /** + * Full constructor for a CSVWriter. + * + * @param tt telemetry controller + * @param _headers CSV headers, whose number must match the number of T types to print + * @param _separator field separator for printing + * @param size hint size for initial memory allocation (dynamic allocation may occur anyway) + */ + CSVWriter( + const TelControllerType & tt, + std::initializer_list< const char * > _headers, + char _separator, + size_t size + ) : + base_t( tt ), + separator( _separator ) + { if( _headers.size() != NUM_FIELDS ) { throw std::runtime_error( "wrong number of headers, it must match the unmber of line elements" ); } @@ -154,7 +139,14 @@ namespace grb { // std::memset( reinterpret_cast< void * >( lines.data() ), 0, lines.size() * sizeof( tuple_t ) ); } - CSVWriter( const TelControllerType & tt, std::initializer_list< const char * > _headers ) : CSVWriter( tt, _headers, STD_CSV_SEP, 10 ) {} + /** + * Construct a new CSVWriter object assuming a comma separator and an initial + * amount of lines to store. + */ + CSVWriter( + const TelControllerType & tt, + std::initializer_list< const char * > _headers + ) : CSVWriter( tt, _headers, STD_CSV_SEP, 10 ) {} CSVWriter( const self_t & ) = delete; @@ -164,6 +156,12 @@ namespace grb { self_t & operator=( self_t && ) = delete; + /** + * Add a line to the CSV, i.e., store the numerical information internally. + * + * @tparam UTypes information types whose number must match the number of fields in the CSV; + * these types must also be implicitly convertible to the corresponding T1, Ts... types + */ template< class... UTypes > void add_line( UTypes &&... vs ) { if( this->is_active() ) { @@ -171,10 +169,22 @@ namespace grb { } } + /** + * Remove all lines from the CSV. + */ void clear() { lines.clear(); } + /** + * Emit the last line of the CSV into \p stream as actual text, i.e. with the fields separated. + * Does not print the newline. + * + * If there is no line stored, the behavior is undefined. + * + * @param stream stream to write into + * @return std::ostream& \p stream itself + */ std::ostream & write_last_line_to_stream( std::ostream & stream ) const { if( lines.size() > 0 && this->is_active() ) { write_line( stream, lines.back() ); @@ -182,6 +192,12 @@ namespace grb { return stream; } + /** + * Returns an object that can be streamed into an std::cout stream via the \a << operator + * in order to print the last line stored. + * + * If there is no line stored, the behavior is undefined. + */ CSVLastTuple last_line() const { if( lines.size() == 0 ) { throw std::runtime_error( "no measures" ); @@ -189,6 +205,9 @@ namespace grb { return CSVLastTuple( *this ); } + /** + * Write the entire CSV into \p stream, with heading (heading, separated fields with newline). + */ std::ostream & write_to_stream( std::ostream & stream ) const { if( ! this->is_active() ) { return stream; @@ -202,6 +221,10 @@ namespace grb { return stream; } + /** + * Creates a new file named \p name (or overwrites an existing one) and stores the entire CSV + * into it. + */ void write_to_file( const char * name ) const { if( ! this->is_active() ) { return; @@ -252,6 +275,75 @@ namespace grb { } }; + /** + * Temaplate specialization that assumes disabled telemetry: no state is kept, + * operations produce no result when invoked (no output into streams, no file creation). + * + * @tparam TelControllerType + * @tparam T1 + * @tparam Ts + */ + template< + typename TelControllerType, + class T1, + class... Ts + > class CSVWriter< TelControllerType, false, T1, Ts... > : + public TelemetryBase< TelControllerType, false > { + public: + static_assert( __is_csv_printable< T1, Ts... >::value, "not all types are printable" ); + + using self_t = CSVWriter< TelControllerType, false, T1, Ts... >; + + using base_t = TelemetryBase< TelControllerType, false >; + + CSVWriter() = delete; + + CSVWriter( + const TelControllerType & tt, + std::initializer_list< const char * >, + char, + size_t + ) : base_t( tt ) {} + + CSVWriter( + const TelControllerType & tt, + std::initializer_list< const char * > _headers + ) : CSVWriter( tt, _headers, STD_CSV_SEP, 10 ) {} + + CSVWriter( const self_t & ) = delete; + + CSVWriter( self_t && ) = delete; + + self_t & operator=( const self_t & ) = delete; + + self_t & operator=( self_t && ) = delete; + + template< class... UTypes > void add_line( UTypes &&... ) { + static_assert( sizeof...( UTypes ) == sizeof...( Ts ) + 1 ); + } + + void clear() {} + + std::ostream & write_last_line_to_stream( std::ostream & stream ) const { + return stream; + } + + char last_line() const { + return '\0'; + } + + std::ostream & write_to_stream( std::ostream & stream ) const { + return stream; + } + + void write_to_file( const char * name ) const { + (void)name; + } + }; + + /** + * Implementation of CSVWriter for enabled telemetry, with implemented operations. + */ template< class T1, class... Ts > using StaticCSVWriter = CSVWriter< TelemetryControllerAlwaysOn, true, T1, Ts... >; diff --git a/include/graphblas/utils/telemetry/OutputStream.hpp b/include/graphblas/utils/telemetry/OutputStream.hpp index 8ec0606d7..3d7c9fb1b 100644 --- a/include/graphblas/utils/telemetry/OutputStream.hpp +++ b/include/graphblas/utils/telemetry/OutputStream.hpp @@ -15,9 +15,11 @@ * limitations under the License. */ -/* - * @author Alberto Scolari - * @date 14th February, 2023 +/** + * @file OutputStream.hpp + * @author Alberto Scolari (alberto.scolar@huawei.com) + * + * Definition for the OutputStream class. */ #ifndef _H_GRB_UTILS_TELEMETRY_OUTPUT_STREAM @@ -34,7 +36,12 @@ namespace grb { namespace utils { namespace telemetry { + /** + * SFINAE-based class to check whether the type \p T can be input to an std::ostream + * via the \a << operator. + */ template< typename T > struct is_ostream_input { + private: template< typename U > static constexpr bool is_input( typename std::enable_if< std::is_same< @@ -49,13 +56,22 @@ namespace grb { return false; } + public: static constexpr bool value = is_input< T >( nullptr ); }; - class OutputStreamLazy { - constexpr char operator()() const { return '\0'; } - }; - + /** + * Telemetry-controllable output stream with basic interface, based on the \a << operator. + * + * It accepts in input any type \a std::ostream accepts. In addition, it also accepts + * the internl #OutputStreamLazy type, which marks callable objects and allows + * lazy evaluation of their result if the telemetry is active; if not, the object is + * not called, avoiding runtime costs. This functionality allows paying time and memory + * costs of computation only if really needed. + * + * @tparam TelControllerType type of the telemetry controller + * @tparam enabled whether telemetry is enabled for this type + */ template< typename TelControllerType, bool enabled = TelControllerType::enabled @@ -63,64 +79,77 @@ namespace grb { public: using self_t = OutputStream< TelControllerType, enabled >; - OutputStream() = default; - - OutputStream( const TelControllerType & _tt, std::ostream & _out ) : - TelemetryBase< TelControllerType, enabled >( _tt ) - { - ( void ) _out; + using base_t = TelemetryBase< TelControllerType, enabled >; + + /** + * Marker object to indicate that the stored callable object is to be called + * in a lazy way, i.e., only if output is active. + * + * @tparam RetType return type of the collable object, to be printed + */ + template< typename RetType > class OutputStreamLazy { + + const std::function< RetType() > f; + + public: + static_assert( is_ostream_input< RetType >::value ); + + template< class F > OutputStreamLazy( F&& _f ) : f( std::forward< F >( _f ) ) {} + + RetType operator()() const { return f(); } + }; + + /** + * Convenience function to create an #OutputStreamLazy object from + * a callable one, inferring all template parameters automatically. + * + * @tparam CallableType type of the given callable object + * @tparam RetType return type of the callable object, to be printed + * @param f callable object + * @return OutputStreamLazy< RetType > object marking lazy evaluation for printing + */ + template< + typename CallableType, + typename RetType = decltype( std::declval< CallableType >()() ) + > static OutputStreamLazy< RetType > makeLazy( CallableType&& f ) { + static_assert( is_ostream_input< RetType >::value ); + return OutputStreamLazy< RetType >( std::forward< CallableType >( f ) ); } - OutputStream( const self_t & _out ) = default; - - OutputStream & operator=( const self_t & _out ) = delete; - - template< typename T > inline typename std::enable_if< - is_ostream_input< T >::value, - self_t & >::type operator<<( T&& v ) { - ( void ) v; - return *this; - } - - inline self_t & operator<<( std::ostream& (*func)( std::ostream& ) ) { - ( void ) func; - return *this; - } - - template< class F > inline typename std::enable_if< - is_ostream_input< decltype( std::declval< F >()() ) >::value - && std::is_base_of< OutputStreamLazy, F >::value, - self_t & >::type operator<<( F&& fun ) { - ( void ) fun; - return *this; - } - }; - - template< typename TelControllerType > class OutputStream< TelControllerType, true > : - public TelemetryBase< TelControllerType, true > { - public: - using self_t = OutputStream< TelControllerType, true >; - - using base_t = TelemetryBase< TelControllerType, true >; - - OutputStream( const TelControllerType & _tt, std::ostream & _out ) : - TelemetryBase< TelControllerType, true >( _tt ), + /** + * Construct a new Output Stream object from a telemetry controller \p -tt + * and an output stream \p _out (usually \a std::cout) + */ + OutputStream( + const TelControllerType & _tt, + std::ostream & _out + ) : + TelemetryBase< TelControllerType, enabled >( _tt ), out( _out ) {} + /** + * Copy constructor. + */ OutputStream( const self_t & _outs ) = default; OutputStream & operator=( const self_t & _out ) = delete; - template< typename T > inline typename std::enable_if< - is_ostream_input< T >::value, - self_t & >::type operator<<( T&& v ) { + /** + * Stream input operator, enabled for all types std::ostream supports. + */ + template< typename T > inline typename std::enable_if< is_ostream_input< T >::value, + self_t & >::type operator<<( T&& v ) { if ( this->is_active() ) { out << std::forward< T >( v ); } return *this; } + /** + * Specialization of the \a << operator for stream manipulators, to support + * \a std::endl and similar manipulators. + */ inline self_t & operator<<( std::ostream& (*func)( std::ostream& ) ) { if ( this->is_active() ) { out << func; @@ -128,10 +157,24 @@ namespace grb { return *this; } + /** + * Specialization of the \a << operator for lazy evaluation of callable objects. + * + * A callable object can be wrapped into an #OutputStreamLazy object in order + * to be called only if necessary, i.e., only if the stream \a this is active. + * In this case, the internal callable object is called, its result is materialized + * and sent into the stream. + * + * To conveniently instantiate an #OutputStreamLazy to pass to this operator, + * see #makeLazy(CallableType&&). + * + * @tparam F type of the callable object + * @param fun callable object + * @return self_t & the stream itself + */ template< class F > inline typename std::enable_if< - is_ostream_input< decltype( std::declval< F >()() ) >::value - && std::is_base_of< OutputStreamLazy, F >::value, - self_t & >::type operator<<( F&& fun ) { + is_ostream_input< decltype( std::declval< OutputStreamLazy< F > >()() ) >::value, + self_t & >::type operator<<( const OutputStreamLazy< F >& fun ) { if ( this->is_active() ) { out << fun(); } @@ -142,9 +185,69 @@ namespace grb { std::ostream & out; }; - using OutputStreamOff = OutputStream< TelemetryControllerAlwaysOff, false >; + /** + * Template specialization of OutputStream + * for deactivated telemetry: no information is stored, no output produced. + */ + template< + typename TelControllerType + > class OutputStream< TelControllerType, false > : + public TelemetryBase< TelControllerType, false > { + public: + using self_t = OutputStream< TelControllerType, false >; + + + template< typename RetType > struct OutputStreamLazy { + + static_assert( is_ostream_input< RetType >::value ); + + template< class F > OutputStreamLazy( F&& ) {} + constexpr char operator()() const { return '\0'; } + }; + + template< + typename CallableType, + typename RetType = decltype( std::declval< CallableType >()() ) + > static OutputStreamLazy< RetType > makeLazy( CallableType&& f ) { + static_assert( is_ostream_input< RetType >::value ); + return OutputStreamLazy< RetType >( std::forward< CallableType >( f ) ); + } + + OutputStream() = default; + + OutputStream( const TelControllerType & _tt, std::ostream & ) : + TelemetryBase< TelControllerType, false >( _tt ) {} + + OutputStream( const self_t & _out ) = default; + + OutputStream & operator=( const self_t & _out ) = delete; + + inline self_t & operator<<( std::ostream& (*)( std::ostream& ) ) { + return *this; + } + + /** + * All-capturing implementation for the input stream operator, printing nothing. + * + * This operator is convenient especially for debugging cases. + * In case of "normal" stream types used with custom data types, the user + * must extend them manually to print the custom data type. If the user uses a + * deactivated stream (for example as a default template parameter to disable + * logging by default), she needs not extend it for custom types in order + * to make it compile, which is especially nonsensical when the output is deactivated. + */ + template< typename T > self_t & operator<<( T&& ) { + return *this; + } + }; + + /// Always active output stream, mainly for debugging purposes. using OutputStreamOn = OutputStream< TelemetryControllerAlwaysOn, true >; + + /// Always inactive output stream + using OutputStreamOff = OutputStream< TelemetryControllerAlwaysOff, false >; + } } } diff --git a/include/graphblas/utils/telemetry/Stopwatch.hpp b/include/graphblas/utils/telemetry/Stopwatch.hpp index 1faa2e186..a607a3cbd 100644 --- a/include/graphblas/utils/telemetry/Stopwatch.hpp +++ b/include/graphblas/utils/telemetry/Stopwatch.hpp @@ -15,9 +15,11 @@ * limitations under the License. */ -/* - * @author Alberto Scolari - * @date 14th February, 2023 +/** + * @file Stopwatch.hpp + * @author Alberto Scolari (alberto.scolar@huawei.com) + * + * Definition for the Stopwatch class. */ #ifndef _H_GRB_UTILS_TELEMETRY_STOPWATCH @@ -31,49 +33,62 @@ namespace grb { namespace utils { namespace telemetry { + /** + * Type to store time duration in nanoseconds, which is the default time granularity. + */ using duration_nano_t = size_t; + /** + * Duration as floating point type, for time granularities coarser than nanoseconds. + */ using duration_float_t = double; + /** + * Base class for Stopwatch, with common logic. + */ class StopwatchBase { public: + + /** + * Convert nanoseconds to microseconds, returned as floating point type duration_float_t. + */ static inline duration_float_t nano2Micro( duration_nano_t nano ) { return static_cast< duration_float_t >( nano ) / 1000UL; } + /** + * Convert nanoseconds to milliseconds, returned as floating point type duration_float_t. + */ static inline duration_float_t nano2Milli( duration_nano_t nano ) { return static_cast< duration_float_t >( nano ) / 1000000UL; } + /** + * Convert nanoseconds to seconds, returned as floating point type duration_float_t. + */ static inline duration_float_t nano2Sec( duration_nano_t nano ) { return static_cast< duration_float_t >( nano ) / 1000000000UL; } }; - template< typename TelControllerType, bool enabled = TelControllerType::enabled > - class Stopwatch : public StopwatchBase, public TelemetryBase< TelControllerType, enabled > { - public: - Stopwatch( const TelControllerType & tt ) : StopwatchBase(), TelemetryBase< TelControllerType, enabled >( tt ) {} - - Stopwatch( const Stopwatch & ) = default; - - constexpr inline void start() {} - - constexpr inline duration_nano_t stop() { - return static_cast< duration_nano_t >( 0 ); - } - - constexpr inline duration_nano_t reset() { - return static_cast< duration_nano_t >( 0 ); - } - - constexpr inline duration_nano_t getElapsedNano() const { - return static_cast< duration_nano_t >( 0 ); - } - }; - - template< typename TelControllerType > - class Stopwatch< TelControllerType, true > : public StopwatchBase, public TelemetryBase< TelControllerType, true > { + /** + * Class with functionalities to measure elapsed time for telemetry purposes: start, stop, reset. + * + * The time granularity is nanoseconds. + * + * Copy semantics is not available. + * + * This implementation assumes telemetry is enabled and the active state is controlled via + * a telemetry controller of type \p TelControllerType. + * + * @tparam TelControllerType underlying telemetry controller type + * @tparam enabled whether it is compile-time enabled + */ + template< + typename TelControllerType, + bool enabled = TelControllerType::enabled + > class Stopwatch : + public StopwatchBase, public TelemetryBase< TelControllerType, enabled > { typedef typename std::chrono::high_resolution_clock clock_t; @@ -81,21 +96,40 @@ namespace grb { typedef typename std::chrono::high_resolution_clock::time_point time_point_t; - duration_t elapsedTime; + duration_t elapsedTime; ///< measured elapsed time so far, i.e., accumulated time periods between successive calls to #start() and #stop() - time_point_t beginning; + time_point_t beginning; ///< time instant of last call to #start() public: - Stopwatch( const TelControllerType & tt ) : StopwatchBase(), TelemetryBase< TelControllerType, true >( tt ), elapsedTime( duration_t::zero() ) {} - - Stopwatch( const Stopwatch & s ) = default; - + /** + * Construct a new Stopwatch object from a telemetry controller. + * + * @param tt underlying telemetry controller, to be (de)activated at runtime + */ + Stopwatch( const TelControllerType & tt ) : + StopwatchBase(), + TelemetryBase< TelControllerType, true >( tt ), + elapsedTime( duration_t::zero() ) {} + + Stopwatch( const Stopwatch< TelControllerType, enabled > & ) = delete; + + /** + * Start measuring time. + * + * Subsequent calls to this method "reset" the measure of elapsed time: if the user calls #start() + * twice and then #stop(), the elapsed time accumulated internally after the call to #stop() is + * the time elapsed from the \b second call of #start() to the call to #stop(). + */ inline void start() { if( this->is_active() ) { beginning = clock_t::now(); } } + /** + * Stops time measurement, returning the elapsed time since the last #start() invocation. + * Elapsed time is internally accounted only if this method is invoked. + */ inline duration_nano_t stop() { duration_nano_t count = 0; if( this->is_active() ) { @@ -107,21 +141,95 @@ namespace grb { return count; } + /** + * Returns the elapsed time, which is accounted \b only if #stop() is called. + * + * The value of the elapsed time is not erased, so that successive calls return + * the same value. + */ + inline duration_nano_t getElapsedNano() const { + return static_cast< duration_nano_t >( elapsedTime.count() ); + } + + /** + * To be called on a stopped watch, it returns the elapsed time and sets it to 0. + */ inline duration_nano_t reset() { - duration_t r = duration_t::zero(); + duration_nano_t r = getElapsedNano(); if( this->is_active() ) { - r = elapsedTime; elapsedTime = duration_t::zero(); } - return static_cast< duration_nano_t >( r.count() ); + return r; } - inline duration_nano_t getElapsedNano() const { - return static_cast< duration_nano_t >( elapsedTime.count() ); + /** + * Stops the watch, sets the elapsed time to 0, starts it again + * and returns the time elapsed between the previous #start() + * and the #stop() internally called. + */ + inline duration_nano_t restart() { + stop(); + duration_nano_t r = reset(); + start(); + return r; } }; - using StaticStopwatch = Stopwatch< TelemetryControllerAlwaysOn, true >; + /** + * Template specialization of Stopwatch for disabled telemetry: + * no state is stored, all functions are inactive. + */ + template< + typename TelControllerType + > class Stopwatch< TelControllerType, false > : + public StopwatchBase, public TelemetryBase< TelControllerType, false > { + public: + Stopwatch( const TelControllerType & tt ) : + StopwatchBase(), + TelemetryBase< TelControllerType, false >( tt ) {} + + Stopwatch( const Stopwatch< TelControllerType, false > & ) = delete; + + constexpr inline void start() {} + + constexpr inline duration_nano_t stop() { + return static_cast< duration_nano_t >( 0 ); + } + + constexpr inline duration_nano_t getElapsedNano() const { + return static_cast< duration_nano_t >( 0 ); + } + + constexpr inline duration_nano_t reset() { + return static_cast< duration_nano_t >( 0 ); + + } + + constexpr inline duration_nano_t restart() { + return static_cast< duration_nano_t >( 0 ); + } + + }; + + /** + * Always active stopwatch, requiring no telemetry controller for construction. + * Mainly for debugging purposes. + */ + class ActiveStopwatch : public Stopwatch< TelemetryControllerAlwaysOn, true > { + public: + + using base_t = Stopwatch< TelemetryControllerAlwaysOn, true >; + + ActiveStopwatch(): + base_t( tt ), + tt( true ) {} + + ActiveStopwatch( const ActiveStopwatch & ) = delete; + + private: + TelemetryControllerAlwaysOn tt; + }; + } // namespace telemetry } // namespace utils } // namespace grb diff --git a/include/graphblas/utils/telemetry/Telemetry.hpp b/include/graphblas/utils/telemetry/Telemetry.hpp index 0bb35909b..3da512b82 100644 --- a/include/graphblas/utils/telemetry/Telemetry.hpp +++ b/include/graphblas/utils/telemetry/Telemetry.hpp @@ -15,9 +15,28 @@ * limitations under the License. */ -/* - * @author Alberto Scolari - * @date 14th February, 2023 +/** + * @dir include/graphblas/utils/telemetry + * This folder contains all telemetry functionalities, i.e., those meant to measure + * and report code execution in detail. They are designed with two goals in mind: + * -# compile-time control: all functionalities can be activated or deactivated + * at compile-time; if deactivated, they incur no runtime and memory cost + * -# fine granularity: since telemetry is complex and very application-specific, + * they allow fine-grained measurement and reporting; hence, they are also meant + * to be conveniently integrated into an existing application at fine granularity + * -# no pre-processor cluttering: multiple specializations may exist for the same functionality, + * for example to avoid memory or runtime costs if telemetry is deactivated; all + * implementations \b must compile against the same code paths, to avoid verbose + * insertion of #ifdef or similar directives on user's behalf. + * + * See the documentation of TelemetryController.hpp for some basic examples. + */ + +/** + * @file OutputStream.hpp + * @author Alberto Scolari (alberto.scolar@huawei.com) + * + * Convenience all-include header for all telemetry-related functionalities. */ #ifndef _H_GRB_UTILS_TELEMETRY_TELEMETRY diff --git a/include/graphblas/utils/telemetry/TelemetryBase.hpp b/include/graphblas/utils/telemetry/TelemetryBase.hpp index fcb9f5105..04773591a 100644 --- a/include/graphblas/utils/telemetry/TelemetryBase.hpp +++ b/include/graphblas/utils/telemetry/TelemetryBase.hpp @@ -15,9 +15,11 @@ * limitations under the License. */ -/* - * @author Alberto Scolari - * @date 1st March, 2023 +/** + * @file TelemetryBase.hpp + * @author Alberto Scolari (alberto.scolar@huawei.com) + * + * Definition for the TelemetryBase class. */ #ifndef _H_GRB_UTILS_TELEMETRY_TELEMETRY_BASE @@ -30,57 +32,74 @@ namespace grb { namespace telemetry { /** + * Base class provided as a convenience, exposing whether the telemetry is active. * + * Default contruction is unavailable, because telemetry functionalities need an + * underlying telemetry controller to know whether they are enabled and active. * - * @tparam TelControllerType - * @tparam enabled + * Instead, copy construction is available for inheriting classes to easily implement copy semantics + * if needed; the copy shares the same telemetry controller of the original object via a reference. + * + * This implementation corresponds to enabled telemetry and stores an actual + * telemetry controller at runtime to be notified about its active state. + * + * @tparam TelControllerType type of the underlying telemetry controller, + * usually derived from TelemetryControllerBase + * @tparam enabled whther the current type is enabled (usually equals to TelControllerType::enabled) */ template< typename TelControllerType, bool enabled = TelControllerType::enabled > class TelemetryBase { + + const TelControllerType & telemetry_Controller; + public: static_assert( is_telemetry_controller< TelControllerType >::value, "type TelControllerType does not implement Telemetry Controller interface" ); using self_t = TelemetryBase< TelControllerType, enabled >; - TelemetryBase() = default; - - TelemetryBase( const TelControllerType & tt ) { - ( void ) tt; - } + TelemetryBase( const TelControllerType & tt ): telemetry_Controller( tt ) {} - TelemetryBase( const self_t & ) = default; + TelemetryBase( const self_t & tb ) : telemetry_Controller( tb.telemetry_Controller ) {} self_t & operator=( const self_t & ) = delete; - constexpr bool is_active() const { return false; } + bool is_active() const { return telemetry_Controller.is_active(); } }; - - template< + /** + * Template specialization for disabled telemetry: no state, no activity. + * + * @tparam TelControllerType + */ + template < typename TelControllerType - > class TelemetryBase< TelControllerType, true > { - - const TelControllerType & telemetry_Controller; - + > class TelemetryBase< TelControllerType, false > { public: static_assert( is_telemetry_controller< TelControllerType >::value, "type TelControllerType does not implement Telemetry Controller interface" ); - using self_t = TelemetryBase< TelControllerType, true >; + using self_t = TelemetryBase< TelControllerType, false >; - TelemetryBase( const TelControllerType & tt ): telemetry_Controller( tt ) {} + TelemetryBase() = default; - TelemetryBase( const self_t & tb ) : telemetry_Controller( tb.telemetry_Controller ) {} + TelemetryBase( const TelControllerType & ) {} + + TelemetryBase( const self_t & ) = default; self_t & operator=( const self_t & ) = delete; - bool is_active() const { return telemetry_Controller.is_active(); } + constexpr bool is_active() const { return false; } }; - // always actibe base, especially for prototyping scenarios + /** + * Specialization of TelemetryControllerBase for enabled and always active telemetry, + * mainly for debugging purposes: it is always active. + * + * For API compliance, it accepts an always-on telemetry controller, but does not store it. + */ template<> class TelemetryBase< TelemetryControllerAlwaysOn, true > { public: static_assert( is_telemetry_controller< TelemetryControllerAlwaysOn >::value, diff --git a/include/graphblas/utils/telemetry/TelemetryController.hpp b/include/graphblas/utils/telemetry/TelemetryController.hpp index 63a013eab..f32c9ca21 100644 --- a/include/graphblas/utils/telemetry/TelemetryController.hpp +++ b/include/graphblas/utils/telemetry/TelemetryController.hpp @@ -16,8 +16,8 @@ */ /** - * @author Alberto Scolari - * @date 1st March, 2023 + * @file TelemetryController.hpp + * @author Alberto Scolari (alberto.scolar@huawei.com) * * This file defines the basic functionalities for Telemetry Controllers, i.e., * objects that enable/disable telemetry at compile-time and runtime. @@ -102,104 +102,113 @@ namespace grb { * field, possibly "short-circuiting" when #enabled is \a false. This implementation does * exactly this, disabling telemetry at compile-time and ignoring any runtime information. * - * @tparam en whether telemetry is enabled (\p en = \a true has a dedicated template specialization) + * Copy semantics is not available, because a controller stores just one piece of information + * (whether it is active) and a copy would essentially behave as a new object. + * Therefore, users should rather create new controllers themselves or pass around references + * to the same controller, in order to centralize control via a single controller object. + * + * Also move semantics is not available, since an "empty" controller makes no sense. + * + * This implementation assumes \p en = \a true, because a specialization for + * \p en = \a false exists (hence #enabled is set as \a true at compile-time). + * + * @tparam en whether telemetry is enabled (\p en = \a false has a + * dedicated template specialization) */ template< bool en > class TelemetryControllerBase { public: using self_t = TelemetryControllerBase< en >; /** - * Construct a new Telemetry Controller Base object with runtime information. - * - * HEre, runtime information is ignored, as this implementation disables any telemetry. + * Construct a new Telemetry oCntroller Base object, specifying the \a active state. * - * @param _enabled whether telemetry is runtime-enabled (ignored here) + * @param _active whether the controller is \a active or not */ - TelemetryControllerBase( bool _enabled ) { - (void) _enabled; - } + TelemetryControllerBase( bool _active ) : active( _active ) {} TelemetryControllerBase() = delete; - TelemetryControllerBase( const self_t & ) = delete; + TelemetryControllerBase( const self_t & ) = default; TelemetryControllerBase& operator=( const self_t & ) = delete; /** - * Whether telemetry is runtime-active. - * - * @return true never here - * @return false always - */ - constexpr bool inline is_active() const { return false; } + * Tells whether the controller is \a active. + */ + bool is_active() const { return this->active; } /** - * Set the active status of the telemetry controller. + * Set the \a active status of the controller at runtime. * - * This \a disabled implementation ignores the input \p _active. + * @param _active whether to activate the controller */ void inline set_active( bool _active ) { - ( void ) _active; + this->active = _active; } /** - * Whether telemetry is compile-time active (never here). - */ - static constexpr bool enabled = false; - }; + * Whether telemetry is compile-time active (here always). + */ + static constexpr bool enabled = true; - /** - * Convenience definition fo an always-off telemetry controller. - */ - using TelemetryControllerAlwaysOff = TelemetryControllerBase< false >; + protected: + bool active; + }; /** - * Template specialization for compile-time enabled telemetry, which - * can be controlled at runtime. + * Template specialization for compile-time disabled telemetry, + * whose functionalities are all disabled. * - * The controller is \b enabled by default, and its \a active status can be controlled - * at runtime via the constructor and the #set_active(bool) method. + * The controller is \b disabled by default, and modifications to + * its \a active status are ignored. */ - template<> class TelemetryControllerBase< true > { + template< > class TelemetryControllerBase< false > { public: - using self_t = TelemetryControllerBase< true >; + using self_t = TelemetryControllerBase< false >; /** - * Construct a new Telemetry oCntroller Base object, specifying the \a active state. + * Construct a new Telemetry Controller Base object with runtime information. * - * @param _active whether the controller is \a active or not + * Here, runtime information is ignored, as this implementation disables any telemetry. + * + * @param _enabled whether telemetry is runtime-enabled (ignored here) */ - TelemetryControllerBase( bool _active ) : active( _active ) {} + TelemetryControllerBase( bool _enabled ) { + (void) _enabled; + } TelemetryControllerBase() = delete; - TelemetryControllerBase( const self_t & ) = default; + TelemetryControllerBase( const self_t & ) = delete; TelemetryControllerBase& operator=( const self_t & ) = delete; /** - * Tells whether the controller is \a active. - */ - bool is_active() const { return this->active; } + * Whether telemetry is runtime-active. + * + * @return true never here + * @return false always + */ + constexpr bool inline is_active() const { return false; } /** - * Set the \a active status of the controller at runtime. + * Set the active status of the telemetry controller. * - * @param _active whether to activate the controller + * This \a disabled implementation ignores the input \p _active. */ - void inline set_active( bool _active ) { - this->active = _active; - } + void inline set_active( bool ) {} /** - * Whether telemetry is compile-time active (here always). - */ - static constexpr bool enabled = true; - - protected: - bool active; + * Whether telemetry is compile-time active (never here). + */ + static constexpr bool enabled = false; }; + /** + * Convenience definition fo an always-off telemetry controller. + */ + using TelemetryControllerAlwaysOff = TelemetryControllerBase< false >; + /** * Always active controller, useful especially for prototyping scenarios. */ @@ -225,9 +234,7 @@ namespace grb { * * This \a disabled implementation ignores the input \p _active. */ - void inline set_active( bool _active ) { - ( void ) _active; - } + void inline set_active( bool ) {} /** * Whether telemetry is compile-time active (here always). @@ -294,15 +301,17 @@ namespace grb { * This declaration requires the declaration of an associated controller enabler type, which controls * whether the controller is enabled at compile-time; the controller is by default \b deactivated. */ -#define DEFINE_TELEMETRY_CONTROLLER( name ) \ - class __TELEMETRY_CONTROLLER_ENABLER_NAME( name ) {}; \ - using name = class __TELEMETRY_CONTROLLER_NAME( name ) : \ - public grb::utils::telemetry::TelemetryControllerBase< \ - grb::utils::telemetry::is_controller_enabled< __TELEMETRY_CONTROLLER_ENABLER_NAME( name ) >() > { \ - public: \ - using base_t = grb::utils::telemetry::TelemetryControllerBase< \ - grb::utils::telemetry::is_controller_enabled< __TELEMETRY_CONTROLLER_ENABLER_NAME( name ) >() >; \ - __TELEMETRY_CONTROLLER_NAME( name )( bool _enabled ) : base_t( _enabled ) {} \ +#define DEFINE_TELEMETRY_CONTROLLER( name ) \ + class __TELEMETRY_CONTROLLER_ENABLER_NAME( name ) {}; \ + using name = class __TELEMETRY_CONTROLLER_NAME( name ) : \ + public grb::utils::telemetry::TelemetryControllerBase< \ + grb::utils::telemetry::is_controller_enabled< \ + __TELEMETRY_CONTROLLER_ENABLER_NAME( name ) >() > { \ + public: \ + using base_t = grb::utils::telemetry::TelemetryControllerBase< \ + grb::utils::telemetry::is_controller_enabled< \ + __TELEMETRY_CONTROLLER_ENABLER_NAME( name ) >() >; \ + __TELEMETRY_CONTROLLER_NAME( name )( bool _enabled ) : base_t( _enabled ) {} \ }; /** @@ -311,9 +320,9 @@ namespace grb { * Once enabled, it can be runtime activated. */ #define ENABLE_TELEMETRY_CONTROLLER( name ) class __TELEMETRY_CONTROLLER_ENABLER_NAME( name ); \ - namespace grb { namespace utils { namespace telemetry { \ - template<> constexpr bool is_controller_enabled< \ - __TELEMETRY_CONTROLLER_ENABLER_NAME( name ) >() { return true; } \ + namespace grb { namespace utils { namespace telemetry { \ + template<> constexpr bool is_controller_enabled< \ + __TELEMETRY_CONTROLLER_ENABLER_NAME( name ) >() { return true; } \ } } } #endif // _H_GRB_UTILS_TELEMETRY_TELEMETRY_CONTROLLER diff --git a/include/graphblas/utils/telemetry/Timeable.hpp b/include/graphblas/utils/telemetry/Timeable.hpp index 95d1bdfa2..2ffb97723 100644 --- a/include/graphblas/utils/telemetry/Timeable.hpp +++ b/include/graphblas/utils/telemetry/Timeable.hpp @@ -15,13 +15,15 @@ * limitations under the License. */ -/* - * @author Alberto Scolari - * @date 14th February, 2023 +/** + * @file Timeable.hpp + * @author Alberto Scolari (alberto.scolar@huawei.com) + * + * Definition for the Timeable class. */ -#ifndef _H_GRB_UTILS_TIMEABLE -#define _H_GRB_UTILS_TIMEABLE +#ifndef _H_GRB_UTILS_TELEMETRY_TIMEABLE +#define _H_GRB_UTILS_TELEMETRY_TIMEABLE #include "Stopwatch.hpp" @@ -29,6 +31,14 @@ namespace grb { namespace utils { namespace telemetry { + /** + * Facility for inheriting classes that want to time interal operations: + * this class provides protected methods to measure elapsed time and public methods to expose + * elapsed time and allow resetting the internal elapsed time. + * + * @tparam TelControllerType type of telemetry controller + * @tparam enabled whether telemetry is enabled + */ template< typename TelControllerType, bool enabled = TelControllerType::enabled @@ -44,23 +54,41 @@ namespace grb { Timeable& operator=( const self_t & ) = delete; + /** + * Get the elapsed time, in nanoseconds. + */ constexpr inline duration_nano_t getElapsedNano() const { return static_cast< duration_nano_t >( 0 ); } + /** + * Reset the internal value of elapsed time. + */ constexpr inline duration_nano_t reset() { return static_cast< duration_nano_t >( 0 ); } protected: + + /** + * Starts measuring the elapsed time. + */ inline void start() {} + /** + * Stops measuring elapsed time. + */ constexpr inline duration_nano_t stop() { return static_cast< duration_nano_t >( 0 ); } }; + /** + * Implementation of Timeable for enabled telemetry. + * + * @tparam TelControllerType type of telemetry controller. + */ template< typename TelControllerType > class Timeable< TelControllerType, true > { public: using self_t = Timeable< TelControllerType, true >; @@ -98,4 +126,4 @@ namespace grb { } } -#endif // _H_GRB_UTILS_TIMEABLE +#endif // _H_GRB_UTILS_TELEMETRY_TIMEABLE diff --git a/tests/smoke/hpcg.cpp b/tests/smoke/hpcg.cpp index 07c38cc99..45c89cd29 100644 --- a/tests/smoke/hpcg.cpp +++ b/tests/smoke/hpcg.cpp @@ -24,28 +24,25 @@ * benchmark impementation in https://github.com/hpcg-benchmark/hpcg. */ +#include +#include #include #include #include -#include -#include -#include -#include -#include #include #include +#include #include +#include +#include #include - +#include +#include +#include +#include #include #include -#include -#include - -#include -#include - #include #include @@ -83,7 +80,7 @@ struct HPCGTypes { using NonzeroType = value_t; using InputType = value_t; using ResidualType = value_t; - using Ring = Semiring< grb::operators::add< NonzeroType >, grb::operators::mul< NonzeroType >, + using Ring = Semiring< grb::operators::add< NonzeroType >,grb::operators::mul< NonzeroType >, grb::identities::zero, grb::identities::one >; using Minus = operators::subtract< NonzeroType >; using Divide = operators::divide< NonzeroType >; @@ -139,8 +136,8 @@ using hpcg_runner_t = MultiGridCGRunner< HPCGTypes, mg_runner_t, hpcg_controller using hpcg_data_t = typename hpcg_runner_t::HPCGInputType; struct dotter : grb::utils::telemetry::OutputStreamLazy { - const grb::Vector< IOType > &v; - dotter( const grb::Vector< IOType > &_v ) : v( _v ) {} + const grb::Vector< IOType > & v; + dotter( const grb::Vector< IOType > & _v ) : v( _v ) {} ResidualType operator()() const { Ring ring; ResidualType r = 0; @@ -154,10 +151,10 @@ static inline DBGStream & operator<<( DBGStream & stream, const grb::Vector< IOT return stream << dotter( v ); } -static const IOType io_zero = Ring(). template getZero< IOType >(); -static const NonzeroType nz_zero = Ring(). template getZero< NonzeroType >(); -static const InputType input_zero = Ring(). template getZero< InputType >(); -static const ResidualType residual_zero = Ring(). template getZero< ResidualType >(); +static const IOType io_zero = Ring().template getZero< IOType >(); +static const NonzeroType nz_zero = Ring().template getZero< NonzeroType >(); +static const InputType input_zero = Ring().template getZero< InputType >(); +static const ResidualType residual_zero = Ring().template getZero< ResidualType >(); static constexpr size_t MAX_CSV_PATH_LENGTH = 255; @@ -202,14 +199,12 @@ struct output { }; #ifdef HPCG_PRINT_SYSTEM -static void print_system( - const std::vector< std::unique_ptr< mg_data_t > > &system_levels, - const std::vector< std::unique_ptr< coarsening_data_t > > &coarsener_levels -) { +static void print_system( const std::vector< std::unique_ptr< mg_data_t > > & system_levels, + const std::vector< std::unique_ptr< coarsening_data_t > > & coarsener_levels ) { assert( spmd<>::nprocs() == 1 ); // distributed printin of system not implemented print_matrix( system_levels[ 0 ]->A, 70, "A" ); for( size_t i = 0; i < coarsener_levels.size(); i++ ) { - print_matrix( coarsener_levels[i ] ->coarsening_matrix, 50, "COARSENING MATRIX" ); + print_matrix( coarsener_levels[ i ]->coarsening_matrix, 50, "COARSENING MATRIX" ); print_matrix( system_levels[ i + 1 ]->A, 50, "COARSER SYSTEM MATRIX" ); } } @@ -233,7 +228,7 @@ static void allocate_system_structures( std::vector< std::unique_ptr< mg_data_t ) { grb::utils::Timer timer; - hpcg_data_t *data = new hpcg_data_t( mg_sizes[ 0 ] ); + hpcg_data_t * data = new hpcg_data_t( mg_sizes[ 0 ] ); cg_system_data = std::unique_ptr< hpcg_data_t >( data ); logger << "allocating data for the MultiGrid simulation..."; timer.reset(); @@ -247,25 +242,29 @@ static void allocate_system_structures( std::vector< std::unique_ptr< mg_data_t grb::RC rc = data->init_vectors( io_zero ); ASSERT_RC_SUCCESS( rc ); std::for_each( system_levels.begin(), system_levels.end(), - []( std::unique_ptr< mg_data_t > &s) { ASSERT_RC_SUCCESS( s->init_vectors( io_zero ) ); } ); + []( std::unique_ptr< mg_data_t > & s ) { + ASSERT_RC_SUCCESS( s->init_vectors( io_zero ) ); + } ); std::for_each( coarsener_levels.begin(), coarsener_levels.end(), - []( std::unique_ptr< coarsening_data_t > &s) { ASSERT_RC_SUCCESS( s->init_vectors( io_zero ) ); } ); + []( std::unique_ptr< coarsening_data_t > & s ) { + ASSERT_RC_SUCCESS( s->init_vectors( io_zero ) ); + } ); std::for_each( smoother_levels.begin(), smoother_levels.end(), - []( std::unique_ptr< smoothing_data_t > &s) { ASSERT_RC_SUCCESS( s->init_vectors( io_zero ) ); } ); + []( std::unique_ptr< smoothing_data_t > & s ) { + ASSERT_RC_SUCCESS( s->init_vectors( io_zero ) ); + } ); time = timer.time(); logger << " time (ms) " << time << std::endl; } - /** * Builds and initializes a 3D system for an HPCG simulation according to the given 3D system sizes. * It allocates the data structures and populates them according to the algorithms chosen for HPCG. */ -static void build_3d_system( - std::vector< std::unique_ptr< mg_data_t > > &system_levels, - std::vector< std::unique_ptr< coarsening_data_t > > &coarsener_levels, - std::vector< std::unique_ptr< smoothing_data_t > > &smoother_levels, - std::unique_ptr< hpcg_data_t > &cg_system_data, +static void build_3d_system( std::vector< std::unique_ptr< mg_data_t > > & system_levels, + std::vector< std::unique_ptr< coarsening_data_t > > & coarsener_levels, + std::vector< std::unique_ptr< smoothing_data_t > > & smoother_levels, + std::unique_ptr< hpcg_data_t > & cg_system_data, const simulation_input & in, const mg_controller_t & tt, DistStream & logger @@ -274,10 +273,8 @@ static void build_3d_system( using builder_t = grb::algorithms::HPCGSystemBuilder< DIMS, coord_t, NonzeroType >; grb::utils::Timer timer; - HPCGSystemParams< DIMS, NonzeroType > params = { - { in.nx, in.ny, in.nz }, HALO_RADIUS, SYSTEM_DIAG_VALUE, SYSTEM_NON_DIAG_VALUE, - PHYS_SYSTEM_SIZE_MIN, in.max_coarsening_levels, 2 - }; + HPCGSystemParams< DIMS, NonzeroType > params = { { in.nx, in.ny, in.nz }, HALO_RADIUS, + SYSTEM_DIAG_VALUE, SYSTEM_NON_DIAG_VALUE, PHYS_SYSTEM_SIZE_MIN, in.max_coarsening_levels, 2 }; std::vector< builder_t > mg_generators; logger << "building HPCG generators for " << ( in.max_coarsening_levels + 1 ) << " levels..."; @@ -286,39 +283,43 @@ static void build_3d_system( hpcg_build_multigrid_generators( params, mg_generators ); double time = timer.time(); logger << " time (ms) " << time << std::endl; - logger << "built HPCG generators for " << mg_generators.size() - << " levels" << std::endl; + logger << "built HPCG generators for " << mg_generators.size() << " levels" << std::endl; // extract the size for each level std::vector< size_t > mg_sizes; - std::transform( mg_generators.cbegin(), mg_generators.cend(), std::back_inserter( mg_sizes ), - [] ( const builder_t &b ) { return b.system_size(); } ); + std::transform( mg_generators.cbegin(), mg_generators.cend(), std::back_inserter( mg_sizes ), + []( const builder_t & b ) { + return b.system_size(); + } ); // given the sizes, allocate the data structures for all the inputs of the algorithms - allocate_system_structures( system_levels, coarsener_levels, smoother_levels, cg_system_data, mg_sizes, tt, logger ); + allocate_system_structures( system_levels, coarsener_levels, smoother_levels, + cg_system_data, mg_sizes, tt, logger ); assert( mg_generators.size() == system_levels.size() ); assert( mg_generators.size() == smoother_levels.size() ); assert( mg_generators.size() - 1 == coarsener_levels.size() ); // coarsener acts between two levels // for each grid level, populate the data structures according to the specific algorithm // and track the time for diagnostics purposes - for( size_t i = 0; i < mg_generators.size(); i++) { + for( size_t i = 0; i < mg_generators.size(); i++ ) { logger << "SYSTEM LEVEL " << i << std::endl; - auto& sizes = mg_generators[ i ].get_generator().get_sizes(); + auto & sizes = mg_generators[ i ].get_generator().get_sizes(); logger << " sizes: "; for( size_t s = 0; s < DIMS - 1; s++ ) { - logger <A, logger ); + grb::RC rc = hpcg_populate_system_matrix( mg_generators[ i ], + system_levels.at( i )->A, logger ); time = timer.time(); ASSERT_RC_SUCCESS( rc ); logger << " time (ms) " << time << std::endl; logger << " populating smoothing data: "; timer.reset(); - rc = hpcg_populate_smoothing_data( mg_generators[ i ], *smoother_levels[ i ], logger ); + rc = hpcg_populate_smoothing_data( mg_generators[ i ], *smoother_levels[ i ], + logger ); time = timer.time(); ASSERT_RC_SUCCESS( rc ); logger << " time (ms) " << time << std::endl; @@ -326,10 +327,12 @@ static void build_3d_system( if( i > 0 ) { logger << " populating coarsening data: "; timer.reset(); - if( !in.use_average_coarsener ) { - rc = hpcg_populate_coarsener( mg_generators[ i - 1 ], mg_generators[ i ], *coarsener_levels[ i - 1 ] ); + if( ! in.use_average_coarsener ) { + rc = hpcg_populate_coarsener( mg_generators[ i - 1 ], mg_generators[ i ], + *coarsener_levels[ i - 1 ] ); } else { - rc = hpcg_populate_coarsener_avg( mg_generators[ i - 1 ], mg_generators[ i ], *coarsener_levels[ i - 1 ] ); + rc = hpcg_populate_coarsener_avg( mg_generators[ i - 1 ], mg_generators[ i ], + *coarsener_levels[ i - 1 ] ); } time = timer.time(); ASSERT_RC_SUCCESS( rc ); @@ -338,7 +341,6 @@ static void build_3d_system( } } - /** * Main test, building an HPCG problem and running the simulation closely following the * parameters in the reference HPCG test. @@ -400,9 +402,9 @@ void grbProgram( const simulation_input & in, struct output & out ) { } #endif - Matrix< NonzeroType > &A = mg_runner.system_levels[ 0 ]->A; - Vector< IOType > &x = hpcg_state->x; - Vector< NonzeroType > &b = hpcg_state->b; + Matrix< NonzeroType > & A = mg_runner.system_levels[ 0 ]->A; + Vector< IOType > & x = hpcg_state->x; + Vector< NonzeroType > & b = hpcg_state->b; RC rc = SUCCESS; // set vectors as from standard HPCG benchmark @@ -420,7 +422,7 @@ void grbProgram( const simulation_input & in, struct output & out ) { out.times.preamble = timer.time(); - mg_data_t &grid_base = *mg_runner.system_levels[ 0 ]; + mg_data_t & grid_base = *mg_runner.system_levels[ 0 ]; // do a cold run to warm the system up logger << TEXT_HIGHLIGHT << "beginning cold run..." << std::endl; @@ -471,7 +473,7 @@ void grbProgram( const simulation_input & in, struct output & out ) { out.times.useful /= static_cast< double >( in.inner_test_repetitions ); logger << TEXT_HIGHLIGHT << "repetitions,average time (ms): " << out.inner_test_repetitions - << ", " << out.times.useful << std::endl; + << ", " << out.times.useful << std::endl; std::cout.imbue( old_locale ); // start postamble @@ -490,10 +492,10 @@ void grbProgram( const simulation_input & in, struct output & out ) { out.times.postamble = timer.time(); // write measurements into CSV files - if ( in.hpcg_log ) { + if( in.hpcg_log ) { hpcg_csv.write_to_file( in.hpcg_csv.data() ); } - if ( in.mg_log ) { + if( in.mg_log ) { mg_csv.write_to_file( in.mg_csv.data() ); } } @@ -514,17 +516,19 @@ int main( int argc, char ** argv ) { thcout << "System size x: " << sim_in.nx << std::endl; thcout << "System size y: " << sim_in.ny << std::endl; thcout << "System size z: " << sim_in.nz << std::endl; - thcout << "Coarsener: " << (sim_in.use_average_coarsener ? "average" : "single point sampler" ) << std::endl; + thcout << "Coarsener: " << ( sim_in.use_average_coarsener ? "average" : + "single point sampler" ) << std::endl; thcout << "System max coarsening levels " << sim_in.max_coarsening_levels << std::endl; thcout << "Test repetitions: " << sim_in.inner_test_repetitions << std::endl; thcout << "Max iterations: " << sim_in.max_iterations << std::endl; - thcout << "Direct launch: " << std::boolalpha << sim_in.evaluation_run << std::noboolalpha << std::endl; - thcout << "No conditioning: " << std::boolalpha << sim_in.no_preconditioning << std::noboolalpha << std::endl; + thcout << "Direct launch: " << std::boolalpha << sim_in.evaluation_run + << std::noboolalpha << std::endl; + thcout << "No conditioning: " << std::boolalpha << sim_in.no_preconditioning + << std::noboolalpha << std::endl; thcout << "Smoother steps: " << sim_in.smoother_steps << std::endl; thcout << "Test outer iterations: " << test_outer_iterations << std::endl; thcout << "Maximum norm for residual: " << max_diff_norm << std::endl; - // the output struct struct output out; @@ -543,10 +547,11 @@ int main( int argc, char ** argv ) { // compute number of inner repetitions to achieve at least 1s duration sim_in.inner_test_repetitions = static_cast< size_t >( 1000.0 / out.times.useful ) + 1; thcout << "Evaluation run" << std::endl - << " computed residual: " << out.cg_out.norm_residual << std::endl - << " iterations: " << out.cg_out.iterations << std::endl - << " time taken (ms): " << out.times.useful << std::endl - << " deduced inner repetitions for 1s duration: " << sim_in.inner_test_repetitions << std::endl; + << " computed residual: " << out.cg_out.norm_residual << std::endl + << " iterations: " << out.cg_out.iterations << std::endl + << " time taken (ms): " << out.times.useful << std::endl + << " deduced inner repetitions for 1s duration: " << sim_in.inner_test_repetitions + << std::endl; } // launch full benchmark @@ -556,18 +561,19 @@ int main( int argc, char ** argv ) { ASSERT_RC_SUCCESS( rc ); ASSERT_RC_SUCCESS( out.error_code ); thcout << "completed successfully!" << std::endl - << " final residual: " << out.cg_out.norm_residual << std::endl - << " solver iterations: " << out.cg_out.iterations << std::endl - << " total time (ms): " << out.times.useful << std::endl; + << " final residual: " << out.cg_out.norm_residual << std::endl + << " solver iterations: " << out.cg_out.iterations << std::endl + << " total time (ms): " << out.times.useful << std::endl; // check result vector, stored inside a pinned vector ASSERT_TRUE( out.pinnedVector ); - const PinnedVector< double > &solution = *out.pinnedVector; + const PinnedVector< double > & solution = *out.pinnedVector; ASSERT_EQ( solution.size(), sim_in.nx * sim_in.ny * sim_in.nz ); // check norm of solution w.r.t. expected solution (i.e. vector of all 1) double diff_norm = sqrt( out.square_norm_diff ); - thcout << "Norm of difference vector: | - | = " << diff_norm << std::endl; + thcout << "Norm of difference vector: | - | = " + << diff_norm << std::endl; ASSERT_LT( diff_norm, max_diff_norm ); thcout << "Test OK" << std::endl; @@ -576,15 +582,10 @@ int main( int argc, char ** argv ) { static const char * const empty = ""; -static void parse_arguments( - simulation_input & sim_in, - size_t & outer_iterations, - double & max_diff_norm, - int argc, - char ** argv -) { +static void parse_arguments( simulation_input & sim_in, size_t & outer_iterations, + double & max_diff_norm, int argc, char ** argv ) { argument_parser parser; - const char * hpcg_csv, * mg_csv; + const char *hpcg_csv, *mg_csv; parser.add_optional_argument( "--nx", sim_in.nx, PHYS_SYSTEM_SIZE_DEF, "physical system size along x" ) .add_optional_argument( "--ny", sim_in.ny, PHYS_SYSTEM_SIZE_DEF, "physical system size along y" ) @@ -598,18 +599,17 @@ static void parse_arguments( "test repetitions with complete initialization" ) .add_optional_argument( "--max-cg-iterations", sim_in.max_iterations, MAX_ITERATIONS_DEF, "maximum number of CG iterations" ) - .add_optional_argument( "--max-difference-norm", max_diff_norm, MAX_NORM, - "maximum acceptable norm | - | (does NOT limit " - "the execution of the algorithm)" ) + .add_optional_argument( "--max-difference-norm", max_diff_norm, MAX_NORM, "maximum acceptable" + " norm | - | (does NOT limit the execution of the algorithm)" ) .add_optional_argument( "--smoother-steps", sim_in.smoother_steps, SMOOTHER_STEPS_DEF, "number of pre/post-smoother steps; 0 disables smoothing" ) .add_option( "--evaluation-run", sim_in.evaluation_run, false, "launch single run directly, without benchmarker (ignore repetitions)" ) .add_option( "--no-preconditioning", sim_in.no_preconditioning, false, "do not apply pre-conditioning via multi-grid V cycle" ) - .add_optional_argument( "--hpcg-csv", hpcg_csv , empty, + .add_optional_argument( "--hpcg-csv", hpcg_csv, empty, "file for HPCG run measurements (overwrites any previous)" ) - .add_optional_argument( "--mg-csv", mg_csv , empty, + .add_optional_argument( "--mg-csv", mg_csv, empty, "file for Multigrid run measurements (overwrites any previous)" ) .add_option( "--use-average-coarsener", sim_in.use_average_coarsener, false, "coarsen by averaging instead of by sampling a single point (slower, but more accurate)" ); @@ -634,18 +634,18 @@ static void parse_arguments( const size_t max_system_divider = 1 << sim_in.max_coarsening_levels; for( size_t s : { sim_in.nx, sim_in.ny, sim_in.nz } ) { std::lldiv_t div_res = std::div( static_cast< long long >( s ), static_cast< long long >( max_system_divider ) ); - if ( div_res.rem != 0) { - std::cerr << "ERROR: system size " << s << " cannot be coarsened " - << sim_in.max_coarsening_levels << " times because it is not exactly divisible" << std::endl; + if( div_res.rem != 0 ) { + std::cerr << "ERROR: system size " << s << " cannot be coarsened " << sim_in.max_coarsening_levels + << " times because it is not exactly divisible" << std::endl; std::exit( -1 ); } - if ( div_res.quot < static_cast< long long >( PHYS_SYSTEM_SIZE_MIN ) ) { - std::cerr << "ERROR: system size " << s << " cannot be coarsened " - << sim_in.max_coarsening_levels << " times because it is too small" << std::endl; + if( div_res.quot < static_cast< long long >( PHYS_SYSTEM_SIZE_MIN ) ) { + std::cerr << "ERROR: system size " << s << " cannot be coarsened " << sim_in.max_coarsening_levels + << " times because it is too small" << std::endl; std::exit( -1 ); } - if ( div_res.quot % 2 != 0 ) { - std::cerr << "ERROR: the coarsest size " << div_res.rem << " is not a multiple of 2" << std::endl; + if( div_res.quot % 2 != 0 ) { + std::cerr << "ERROR: the coarsest size " << div_res.rem << " is not even" << std::endl; std::exit( -1 ); } } @@ -653,7 +653,7 @@ static void parse_arguments( // check output CSVs size_t len = std::strlen( hpcg_csv ); if( ( sim_in.hpcg_log = len > 0 ) ) { - if ( len > MAX_CSV_PATH_LENGTH ) { + if( len > MAX_CSV_PATH_LENGTH ) { std::cerr << "HPCG CSV file name is too long!" << std::endl; std::exit( -1 ); } @@ -661,7 +661,7 @@ static void parse_arguments( } len = std::strlen( mg_csv ); if( ( sim_in.mg_log = len > 0 ) ) { - if ( len > MAX_CSV_PATH_LENGTH ) { + if( len > MAX_CSV_PATH_LENGTH ) { std::cerr << "HPCG CSV file name is too long!" << std::endl; std::exit( -1 ); } From a727e993819bcd622d55f77e155f95aa5c86ae5f Mon Sep 17 00:00:00 2001 From: Alberto Scolari Date: Fri, 24 Feb 2023 17:25:51 +0100 Subject: [PATCH 23/28] adding comments to refactored classes --- .../algorithms/hpcg/greedy_coloring.hpp | 1 + .../algorithms/hpcg/system_building_utils.hpp | 9 ++--- .../algorithms/multigrid/multigrid_cg.hpp | 30 ++++++++------ .../multigrid/multigrid_v_cycle.hpp | 27 ++++++++----- .../multigrid/red_black_gauss_seidel.hpp | 12 +++--- .../multigrid/single_matrix_coarsener.hpp | 9 +++-- tests/smoke/hpcg.cpp | 40 ++++++++++--------- 7 files changed, 70 insertions(+), 58 deletions(-) diff --git a/include/graphblas/algorithms/hpcg/greedy_coloring.hpp b/include/graphblas/algorithms/hpcg/greedy_coloring.hpp index bb4759d6f..24bb1e1e4 100644 --- a/include/graphblas/algorithms/hpcg/greedy_coloring.hpp +++ b/include/graphblas/algorithms/hpcg/greedy_coloring.hpp @@ -61,6 +61,7 @@ namespace grb { * @tparam DIMS dimensions of the system * @tparam CoordType type of the coordinates * @tparam lower_color_first start greedy assignment of colors from lowest first + * * @param[in] system generator for an \p DIMS - dimesional system with halo * @param[out] row_colors if \p reorder_rows_per_color is false, stores the color of each row; * if \p reorder_rows_per_color is true, stores the new position of each row, so that rows diff --git a/include/graphblas/algorithms/hpcg/system_building_utils.hpp b/include/graphblas/algorithms/hpcg/system_building_utils.hpp index 9f3fdf583..ddf9e45a5 100644 --- a/include/graphblas/algorithms/hpcg/system_building_utils.hpp +++ b/include/graphblas/algorithms/hpcg/system_building_utils.hpp @@ -167,6 +167,7 @@ namespace grb { * @tparam DIMS number of dimensions * @tparam CoordType type storing the coordinates and the sizes * @tparam NonzeroType type of the nonzero + * * @param finer_system_generator object generating the finer system * @param coarser_system_generator object generating the finer system * @param coarsener structure with the matrix to populate @@ -256,8 +257,7 @@ namespace grb { * @param[out] per_color_rows for each position \a i it stores an std::vector with all rows * of color \a i inside \p row_colors */ - template< typename CoordType > - void hpcg_split_rows_by_color( + template< typename CoordType > void hpcg_split_rows_by_color( const std::vector< CoordType > & row_colors, size_t num_colors, std::vector< std::vector< CoordType > > & per_color_rows ) { @@ -276,10 +276,7 @@ namespace grb { * * @tparam CoordType type of the internal coordinate */ - template< typename CoordType > - struct true_iter { - - // static const bool __TRUE; + template< typename CoordType > struct true_iter { using self_t = true_iter< CoordType >; using iterator_category = std::random_access_iterator_tag; diff --git a/include/graphblas/algorithms/multigrid/multigrid_cg.hpp b/include/graphblas/algorithms/multigrid/multigrid_cg.hpp index f465ba8da..cd1761589 100644 --- a/include/graphblas/algorithms/multigrid/multigrid_cg.hpp +++ b/include/graphblas/algorithms/multigrid/multigrid_cg.hpp @@ -100,14 +100,13 @@ namespace grb { * The \p MultiGridrunnerType must implement a functional interface whose input (from CG) * is the structure with the system information for one level of the grid. * - * @tparam IOType type of result and intermediate vectors used during computation - * @tparam NonzeroType type of matrix values - * @tparam InputType type of values of the right-hand side vector b - * @tparam ResidualType type of the residual norm + * @tparam MGCGTypes types container for algebraic information (IOType, NonzeroType, + * InputType, ResidualType, Ring, Minus) * @tparam MultiGridrunnerType type for the multi-grid runner object - * @tparam Ring algebraic ring type - * @tparam Minus minus operator * @tparam descr descriptors with statically-known data for computation and containers + * @tparam DbgOutputStreamType type for the debugging stream, i.e. the stream to trace simulation + * results alongside execution; the default type #grb::utils::telemetry::OutputStreamOff disables + * all output at compile time */ template< typename MGCGTypes, @@ -117,12 +116,14 @@ namespace grb { typename DbgOutputStreamType = grb::utils::telemetry::OutputStreamOff > struct MultiGridCGRunner : public grb::utils::telemetry::Timeable< TelControllerType > { + // algebraic types using IOType = typename MGCGTypes::IOType; using NonzeroType = typename MGCGTypes::NonzeroType; using InputType = typename MGCGTypes::InputType; using ResidualType = typename MGCGTypes::ResidualType; using Ring = typename MGCGTypes::Ring; using Minus = typename MGCGTypes::Minus; + // input types for simulation (CG and MG) using HPCGInputType = MultiGridCGData< IOType, NonzeroType, InputType >; using MGRunnerType = MultiGridRunnerType; @@ -141,14 +142,13 @@ namespace grb { ResidualType tolerance = ring.template getZero< ResidualType >(); ///< ratio between initial residual and current residual that halts the solver ///< if reached, for the solution is to be considered "good enough" - MultiGridRunnerType & mg_runner; - DbgOutputStreamType dbg_logger; + MultiGridRunnerType & mg_runner; ///< runner object for MG + DbgOutputStreamType dbg_logger; ///< logger to trace execution /** - * Construct a new MultiGridCGRunner object by moving the required MG runner. + * Construct a new MultiGridCGRunner object with the required MG runner. * - * Moving the state of the MG is safer in that it avoids use-after-free issues, - * as the state of the MG runner is managed automatically with this object. + * The debug logger is unavailable. */ MultiGridCGRunner( const TelControllerType & tt, @@ -161,6 +161,10 @@ namespace grb { static_assert( std::is_default_constructible< DbgOutputStreamType >::value ); } + /** + * Construct a new MultiGridCGRunner object with the required MG runner and + * the user-given debug logger. + */ MultiGridCGRunner( const TelControllerType & tt, MultiGridRunnerType & _mg_runner, @@ -200,7 +204,6 @@ namespace grb { * Failures of GraphBLAS operations are handled by immediately stopping the execution and by returning * the failure code. * - * * @param cg_data data for the CG solver only * @param grid_base base (i.e., finer) level of the multi-grid, with the information of the physical system * @param out_info solver output information @@ -327,7 +330,8 @@ namespace grb { ++iter; out_info.iterations = iter; out_info.norm_residual = norm_residual; - } while( iter < max_iterations && norm_residual / norm_residual_initial > tolerance && ret == SUCCESS ); + } while( iter < max_iterations && norm_residual / norm_residual_initial > tolerance + && ret == SUCCESS ); return ret; } diff --git a/include/graphblas/algorithms/multigrid/multigrid_v_cycle.hpp b/include/graphblas/algorithms/multigrid/multigrid_v_cycle.hpp index 31b623024..1a036c1cc 100644 --- a/include/graphblas/algorithms/multigrid/multigrid_v_cycle.hpp +++ b/include/graphblas/algorithms/multigrid/multigrid_v_cycle.hpp @@ -48,17 +48,17 @@ namespace grb { * It is built by transferring into it the state of both the smoother and the coarsener, * in order to avoid use-after-free issues. * + * @tparam MGTypes types container for algebraic information (IOType, NonzeroType, Ring, Minus) * @tparam MGSmootherType type of the smoother runner, with prescribed methods for the various * smoothing steps * @tparam CoarsenerType type of the coarsener runner, with prescribed methods for coarsening - * @tparam IOType type of result and intermediate vectors used during computation - * @tparam NonzeroType type of matrix values - * and prolongation - * @tparam Ring the ring of algebraic operators and zero values - * @tparam Minus the minus operator for subtractions * @tparam descr descriptors with statically-known data for computation and containers + * @tparam DbgOutputStreamType type for the debugging stream, i.e. the stream to trace simulation + * results alongside execution; the default type #grb::utils::telemetry::OutputStreamOff disables + * all output at compile time */ - template< typename MGTypes, + template< + typename MGTypes, typename MGSmootherType, typename CoarsenerType, typename TelControllerType, @@ -88,7 +88,7 @@ namespace grb { MGSmootherType & smoother_runner; ///< object to run the smoother CoarsenerType & coarsener_runner; ///< object to run the coarsener - DbgOutputStreamType dbg_logger; + DbgOutputStreamType dbg_logger; ///< logger to trace execution std::vector< std::unique_ptr< MultiGridInputType > > system_levels; ///< levels of the grid (finest first) Ring ring; ///< algebraic ring @@ -113,6 +113,8 @@ namespace grb { /** * Construct a new MultiGridRunner object by moving in the state of the pre-built * smoother and coarsener. + * + * The debug logger is deactivated. */ MultiGridRunner( MGSmootherType & _smoother_runner, @@ -124,6 +126,10 @@ namespace grb { static_assert( std::is_default_constructible< DbgOutputStreamType >::value ); } + /** + * Construct a new MultiGridRunner object by moving in the state of the pre-built + * smoother and coarsener and with a user-given debug logger. + */ MultiGridRunner( MGSmootherType & _smoother_runner, CoarsenerType & _coarsener_runner, @@ -141,6 +147,9 @@ namespace grb { __unique_ptr_extractor( system_levels.end() ) ); } + /** + * Operator to invoke a multi-grid run among given levels. + */ inline grb::RC operator()( __unique_ptr_extractor begin, const __unique_ptr_extractor end @@ -171,10 +180,6 @@ namespace grb { * * @param mgiter_begin iterator pointing to the current level of the multi-grid * @param mgiter_end end iterator, indicating the end of the recursion - * @param smoother callable object to invoke the smoothing steps - * @param coarsener callable object to coarsen and prolong (between current and coarser grid levels) - * @param ring the ring to perform the operations on - * @param minus the \f$ - \f$ operator for vector subtractions * @return grb::RC if the algorithm could correctly terminate, the error code of the first * unsuccessful operation otherwise */ diff --git a/include/graphblas/algorithms/multigrid/red_black_gauss_seidel.hpp b/include/graphblas/algorithms/multigrid/red_black_gauss_seidel.hpp index aa7157de7..d86b2382b 100644 --- a/include/graphblas/algorithms/multigrid/red_black_gauss_seidel.hpp +++ b/include/graphblas/algorithms/multigrid/red_black_gauss_seidel.hpp @@ -75,6 +75,7 @@ namespace grb { Descriptor descr = descriptors::no_operation > struct RedBlackGSSmootherRunner { + // algebraic types using IOType = typename SmootherTypes::IOType; using NonzeroType = typename SmootherTypes::NonzeroType; using Ring = typename SmootherTypes::Ring; @@ -132,12 +133,8 @@ namespace grb { /** * Runs a single step of Red-Black Gauss-Seidel for a specific color. * - * @param[in] A the system matrix - * @param[in] A_diagonal a vector storing the diagonal elements of \p A - * @param[in] r the residual - * @param[in,out] z the initial solution to start from, and where the smoothed solution is stored to - * @param[out] smoother_temp a vector for temporary values - * @param[in] color_mask the mask of colors to filter the rows to smooth + * @param[in,out] data structure with external containers, corresponsign to an MG level: vector to smooth, system matrix, residual + * @param[in,out] smoothing_info smoothing-specific information: temporary vectors, color masks * @return grb::RC::SUCCESS if the algorithm could correctly terminate, the error code of the first * unsuccessful operation otherwise */ @@ -189,7 +186,8 @@ namespace grb { * and no check is performed to ensure these assumptions hold. Hence, it is up to user logic * to pass correct coloring information. Otherwise, \b no guarantees hold on the result. * - * @param[in,out] data structure with the data of a single grid level + * @param[in,out] data structure with external containers, corresponsign to an MG level: vector to smooth, system matrix, residual + * @param[in,out] smoothing_info smoothing-specific information: temporary vectors, color masks * @return grb::RC::SUCCESS if the algorithm could correctly terminate, the error code of the first * unsuccessful operation otherwise */ diff --git a/include/graphblas/algorithms/multigrid/single_matrix_coarsener.hpp b/include/graphblas/algorithms/multigrid/single_matrix_coarsener.hpp index 4a19f9deb..3b2379802 100644 --- a/include/graphblas/algorithms/multigrid/single_matrix_coarsener.hpp +++ b/include/graphblas/algorithms/multigrid/single_matrix_coarsener.hpp @@ -35,7 +35,7 @@ namespace grb { namespace algorithms { /** - * Structure storing the data for the coarsener + * Structure storing the data for the coarsener. */ template< typename IOType, @@ -76,6 +76,7 @@ namespace grb { Descriptor descr = descriptors::no_operation > struct SingleMatrixCoarsener { + // algebraic types using IOType = typename CoarsenerTypes::IOType; using NonzeroType = typename CoarsenerTypes::NonzeroType; using Ring = typename CoarsenerTypes::Ring; @@ -132,13 +133,14 @@ namespace grb { * The coarsening information are stored inside \p CoarseningData. * * @param[in] r_fine fine residual vector + * @param[out] r_coarse coarse residual vector, the output * @param[in,out] coarsening_data \ref MultiGridData data structure storing the information for coarsening * @return grb::RC::SUCCESS if the algorithm could correctly terminate, the error code of the first * unsuccessful operation otherwise */ grb::RC compute_coarsening( - const grb::Vector< IOType > & r_fine, // fine residual - grb::Vector< IOType > & r_coarse, // coarse residual + const grb::Vector< IOType > & r_fine, + grb::Vector< IOType > & r_coarse, CoarseningData< IOType, NonzeroType > & coarsening_data ) { RC ret = SUCCESS; @@ -160,6 +162,7 @@ namespace grb { * * For prolongation, this function uses the matrix \p coarsening_data.coarsening_matrix by transposing it. * + * @param[out] z_coarse input solution vector, to be coarsened * @param[out] z_fine the solution vector to store the prolonged solution into * @param[in,out] coarsening_data information for coarsening * @return grb::RC::SUCCESS if the algorithm could correctly terminate, the error code of the first diff --git a/tests/smoke/hpcg.cpp b/tests/smoke/hpcg.cpp index 45c89cd29..86cf798b1 100644 --- a/tests/smoke/hpcg.cpp +++ b/tests/smoke/hpcg.cpp @@ -135,6 +135,7 @@ using hpcg_runner_t = MultiGridCGRunner< HPCGTypes, mg_runner_t, hpcg_controller hpcg_desc, DBGStream >; using hpcg_data_t = typename hpcg_runner_t::HPCGInputType; +// allow DBGStream to print grb::Vector's in a lazy way (i.e., no code generated if deactivated) struct dotter : grb::utils::telemetry::OutputStreamLazy { const grb::Vector< IOType > & v; dotter( const grb::Vector< IOType > & _v ) : v( _v ) {} @@ -151,16 +152,16 @@ static inline DBGStream & operator<<( DBGStream & stream, const grb::Vector< IOT return stream << dotter( v ); } +// various algebraic zeros static const IOType io_zero = Ring().template getZero< IOType >(); static const NonzeroType nz_zero = Ring().template getZero< NonzeroType >(); static const InputType input_zero = Ring().template getZero< InputType >(); static const ResidualType residual_zero = Ring().template getZero< ResidualType >(); +// input/output structure (serializable for distributed execution), +// with the parameters for the HPCG simulation static constexpr size_t MAX_CSV_PATH_LENGTH = 255; -/** - * Container for the parameters for the HPCG simulation. - */ struct simulation_input { // physical parameters for the multi-grid size_t nx, ny, nz; @@ -186,9 +187,6 @@ struct simulation_input { simulation_input( const simulation_input & ) = default; }; -/** - * Container for test outputs. - */ struct output { RC error_code = SUCCESS; size_t inner_test_repetitions = 0; @@ -199,6 +197,7 @@ struct output { }; #ifdef HPCG_PRINT_SYSTEM +// routine to print the system matrices static void print_system( const std::vector< std::unique_ptr< mg_data_t > > & system_levels, const std::vector< std::unique_ptr< coarsening_data_t > > & coarsener_levels ) { assert( spmd<>::nprocs() == 1 ); // distributed printin of system not implemented @@ -218,7 +217,8 @@ static void print_system( const std::vector< std::unique_ptr< mg_data_t > > & sy * This routine is algorithm-agnositc, as long as the constructors of the data types meet the requirements * explained in \ref multigrid_allocate_data(). */ -static void allocate_system_structures( std::vector< std::unique_ptr< mg_data_t > > & system_levels, +static void allocate_system_structures( + std::vector< std::unique_ptr< mg_data_t > > & system_levels, std::vector< std::unique_ptr< coarsening_data_t > > & coarsener_levels, std::vector< std::unique_ptr< smoothing_data_t > > & smoother_levels, std::unique_ptr< hpcg_data_t > & cg_system_data, @@ -261,7 +261,8 @@ static void allocate_system_structures( std::vector< std::unique_ptr< mg_data_t * Builds and initializes a 3D system for an HPCG simulation according to the given 3D system sizes. * It allocates the data structures and populates them according to the algorithms chosen for HPCG. */ -static void build_3d_system( std::vector< std::unique_ptr< mg_data_t > > & system_levels, +static void build_3d_system( + std::vector< std::unique_ptr< mg_data_t > > & system_levels, std::vector< std::unique_ptr< coarsening_data_t > > & coarsener_levels, std::vector< std::unique_ptr< smoothing_data_t > > & smoother_levels, std::unique_ptr< hpcg_data_t > & cg_system_data, @@ -354,7 +355,6 @@ void grbProgram( const simulation_input & in, struct output & out ) { dist_controller_t dist( pid == 0 ); // separate thousands when printing integers class IntegerSeparation : public std::numpunct< char > { - // protected: char do_thousands_sep() const override { return '\''; } @@ -379,7 +379,7 @@ void grbProgram( const simulation_input & in, struct output & out ) { dbg_controller_t dbg_controller( pid == 0 ); DBGStream dbg_stream( dbg_controller, std::cout ); - // define the main HPCG runner and initialize the options of its components + // define the main runners and initialize the options of its components coarsener_runner_t coarsener; smoother_runner_t smoother; smoother.presmoother_steps = smoother.postsmoother_steps = in.smoother_steps; @@ -406,12 +406,15 @@ void grbProgram( const simulation_input & in, struct output & out ) { Vector< IOType > & x = hpcg_state->x; Vector< NonzeroType > & b = hpcg_state->b; - RC rc = SUCCESS; // set vectors as from standard HPCG benchmark - set( x, 1.0 ); - set( b, nz_zero ); + RC rc = set( x, 1.0 ); + ASSERT_RC_SUCCESS( rc ); + rc = set( b, nz_zero ); + ASSERT_RC_SUCCESS( rc ); rc = grb::mxv( b, A, x, Ring() ); - set( x, io_zero ); + ASSERT_RC_SUCCESS( rc ); + rc = set( x, io_zero ); + ASSERT_RC_SUCCESS( rc ); #ifdef HPCG_PRINT_SYSTEM if( pid == 0 ) { @@ -474,11 +477,12 @@ void grbProgram( const simulation_input & in, struct output & out ) { logger << TEXT_HIGHLIGHT << "repetitions,average time (ms): " << out.inner_test_repetitions << ", " << out.times.useful << std::endl; + // restore previous output options std::cout.imbue( old_locale ); // start postamble timer.reset(); - // set error code + // set error code to caller out.error_code = rc; grb::set( b, 1.0 ); @@ -521,9 +525,9 @@ int main( int argc, char ** argv ) { thcout << "System max coarsening levels " << sim_in.max_coarsening_levels << std::endl; thcout << "Test repetitions: " << sim_in.inner_test_repetitions << std::endl; thcout << "Max iterations: " << sim_in.max_iterations << std::endl; - thcout << "Direct launch: " << std::boolalpha << sim_in.evaluation_run + thcout << "Is evaluation run: " << std::boolalpha << sim_in.evaluation_run << std::noboolalpha << std::endl; - thcout << "No conditioning: " << std::boolalpha << sim_in.no_preconditioning + thcout << "Conditioning: " << std::boolalpha << !sim_in.no_preconditioning << std::noboolalpha << std::endl; thcout << "Smoother steps: " << sim_in.smoother_steps << std::endl; thcout << "Test outer iterations: " << test_outer_iterations << std::endl; @@ -650,7 +654,7 @@ static void parse_arguments( simulation_input & sim_in, size_t & outer_iteration } } - // check output CSVs + // check output CSV file names size_t len = std::strlen( hpcg_csv ); if( ( sim_in.hpcg_log = len > 0 ) ) { if( len > MAX_CSV_PATH_LENGTH ) { From b58370041890c85d7cd755b0c8fa92e96795cc7d Mon Sep 17 00:00:00 2001 From: Alberto Scolari Date: Mon, 27 Feb 2023 08:09:00 +0000 Subject: [PATCH 24/28] implementing RBGS with foldl + eWiseApply instead of eWiseLambda --- .../multigrid/red_black_gauss_seidel.hpp | 29 +++++++++++++++---- 1 file changed, 23 insertions(+), 6 deletions(-) diff --git a/include/graphblas/algorithms/multigrid/red_black_gauss_seidel.hpp b/include/graphblas/algorithms/multigrid/red_black_gauss_seidel.hpp index d86b2382b..f004610f4 100644 --- a/include/graphblas/algorithms/multigrid/red_black_gauss_seidel.hpp +++ b/include/graphblas/algorithms/multigrid/red_black_gauss_seidel.hpp @@ -80,6 +80,7 @@ namespace grb { using NonzeroType = typename SmootherTypes::NonzeroType; using Ring = typename SmootherTypes::Ring; using Minus = typename SmootherTypes::Minus; + using Divide = typename SmootherTypes::Divide; using SmootherInputType = MultiGridData< IOType, NonzeroType, TelControllerType >; ///< external input structure using SmootherDataType = SmootherData< IOType >; ///< smoothing information and temporary variables (per MG level) @@ -89,6 +90,8 @@ namespace grb { std::vector< std::unique_ptr< SmootherDataType > > levels; ///< for each grid level, ///< the smoothing data (finest first) Ring ring; ///< the algebraic ring + Minus minus; + Divide divide; static_assert( std::is_default_constructible< Ring >::value, "cannot construct the Ring operator with default values" ); @@ -161,17 +164,31 @@ namespace grb { // Replace below with masked calls: // z[mask] = r[mask] - smoother_temp[mask] + z[mask] .* diagonal[mask] // z[mask] = z[maks] ./ diagonal[mask] + +// by default use foldl()'s, although eWiseLambda() might be more performing +// TODO: leave this choice for future experimentation +#if defined(RBGS_EWL) + Ring & ri = ring; + Minus & mi = minus; + Divide & di = divide; + ret = ret ? ret : grb::eWiseLambda( - [ &z, &r, &smoother_temp, &color_mask, &A_diagonal ]( const size_t i ) { - // if the mask was properly initialized, the check on the mask value is unnecessary; - // if( color_mask[ i ] ) { + [ &z, &r, &smoother_temp, &color_mask, &A_diagonal , &ri, &mi, &di ]( const size_t i ) { IOType d = A_diagonal[ i ]; - IOType v = r[ i ] - smoother_temp[ i ] + z[ i ] * d; - z[ i ] = v / d; - // } + IOType v; + ri.getMultiplicativeOperator().apply( z[ i ], d, v ); + ri.getAdditiveOperator().apply( v, r[ i ], v ); + mi.apply( v, smoother_temp[ i ], v ); + di.apply( v, d, z[ i ] ); }, color_mask, z, r, smoother_temp, A_diagonal ); +#else + grb::foldl( z, color_mask, A_diagonal, ring.getMultiplicativeOperator() ); + grb::foldl( z, color_mask, smoother_temp, minus ); + grb::foldl( z, color_mask, r, ring.getAdditiveOperator() ); + grb::foldl( z, color_mask, A_diagonal, divide ); +#endif assert( ret == SUCCESS ); return ret; } From be240cb74173fbbabcb133dd685f3e44fd4206b9 Mon Sep 17 00:00:00 2001 From: Alberto Scolari Date: Fri, 3 Mar 2023 15:49:08 +0100 Subject: [PATCH 25/28] using new Stopwatch facilities --- tests/smoke/hpcg.cpp | 66 +++++++++++++++++++------------------------- 1 file changed, 29 insertions(+), 37 deletions(-) diff --git a/tests/smoke/hpcg.cpp b/tests/smoke/hpcg.cpp index 86cf798b1..dac62457b 100644 --- a/tests/smoke/hpcg.cpp +++ b/tests/smoke/hpcg.cpp @@ -43,7 +43,6 @@ #include #include #include -#include #include #include @@ -135,10 +134,14 @@ using hpcg_runner_t = MultiGridCGRunner< HPCGTypes, mg_runner_t, hpcg_controller hpcg_desc, DBGStream >; using hpcg_data_t = typename hpcg_runner_t::HPCGInputType; +// Stopwatch type, to measure various setup phases +using Stw = utils::telemetry::ActiveStopwatch; + + // allow DBGStream to print grb::Vector's in a lazy way (i.e., no code generated if deactivated) -struct dotter : grb::utils::telemetry::OutputStreamLazy { +struct dotter { const grb::Vector< IOType > & v; - dotter( const grb::Vector< IOType > & _v ) : v( _v ) {} + ResidualType operator()() const { Ring ring; ResidualType r = 0; @@ -149,7 +152,7 @@ struct dotter : grb::utils::telemetry::OutputStreamLazy { static inline DBGStream & operator<<( DBGStream & stream, const grb::Vector< IOType > & v ) { stream << std::setprecision( 7 ); - return stream << dotter( v ); + return stream << DBGStream::makeLazy( dotter{ v } ); } // various algebraic zeros @@ -226,19 +229,17 @@ static void allocate_system_structures( const mg_controller_t & mg_controller, DistStream & logger ) { - grb::utils::Timer timer; + Stw timer; hpcg_data_t * data = new hpcg_data_t( mg_sizes[ 0 ] ); cg_system_data = std::unique_ptr< hpcg_data_t >( data ); logger << "allocating data for the MultiGrid simulation..."; - timer.reset(); + timer.start(); multigrid_allocate_data( system_levels, coarsener_levels, smoother_levels, mg_sizes, mg_controller ); - double time = timer.time(); - logger << " time (ms) " << time << std::endl; + logger << " time (ms) " << Stw::nano2Milli( timer.restart() ) << std::endl; // zero all vectors logger << "zeroing all vectors..."; - timer.reset(); grb::RC rc = data->init_vectors( io_zero ); ASSERT_RC_SUCCESS( rc ); std::for_each( system_levels.begin(), system_levels.end(), @@ -253,8 +254,7 @@ static void allocate_system_structures( []( std::unique_ptr< smoothing_data_t > & s ) { ASSERT_RC_SUCCESS( s->init_vectors( io_zero ) ); } ); - time = timer.time(); - logger << " time (ms) " << time << std::endl; + logger << " time (ms) " << Stw::nano2Milli( timer.stop() ) << std::endl; } /** @@ -272,18 +272,17 @@ static void build_3d_system( ) { constexpr size_t DIMS = 3; using builder_t = grb::algorithms::HPCGSystemBuilder< DIMS, coord_t, NonzeroType >; - grb::utils::Timer timer; + Stw timer; HPCGSystemParams< DIMS, NonzeroType > params = { { in.nx, in.ny, in.nz }, HALO_RADIUS, SYSTEM_DIAG_VALUE, SYSTEM_NON_DIAG_VALUE, PHYS_SYSTEM_SIZE_MIN, in.max_coarsening_levels, 2 }; std::vector< builder_t > mg_generators; logger << "building HPCG generators for " << ( in.max_coarsening_levels + 1 ) << " levels..."; - timer.reset(); + timer.start(); // construct the builder_t generator for each grid level, which depends on the system physics hpcg_build_multigrid_generators( params, mg_generators ); - double time = timer.time(); - logger << " time (ms) " << time << std::endl; + logger << " time (ms) " << Stw::nano2Milli( timer.stop() ) << std::endl; logger << "built HPCG generators for " << mg_generators.size() << " levels" << std::endl; // extract the size for each level @@ -310,24 +309,21 @@ static void build_3d_system( } logger << sizes[ DIMS - 1 ] << std::endl; logger << " populating system matrix: "; - timer.reset(); + timer.start(); grb::RC rc = hpcg_populate_system_matrix( mg_generators[ i ], system_levels.at( i )->A, logger ); - time = timer.time(); ASSERT_RC_SUCCESS( rc ); - logger << " time (ms) " << time << std::endl; + logger << " time (ms) " << Stw::nano2Milli( timer.restart() ) << std::endl; logger << " populating smoothing data: "; - timer.reset(); rc = hpcg_populate_smoothing_data( mg_generators[ i ], *smoother_levels[ i ], logger ); - time = timer.time(); + logger << " time (ms) " << Stw::nano2Milli( timer.stop() ) << std::endl; ASSERT_RC_SUCCESS( rc ); - logger << " time (ms) " << time << std::endl; if( i > 0 ) { logger << " populating coarsening data: "; - timer.reset(); + timer.start(); if( ! in.use_average_coarsener ) { rc = hpcg_populate_coarsener( mg_generators[ i - 1 ], mg_generators[ i ], *coarsener_levels[ i - 1 ] ); @@ -335,9 +331,8 @@ static void build_3d_system( rc = hpcg_populate_coarsener_avg( mg_generators[ i - 1 ], mg_generators[ i ], *coarsener_levels[ i - 1 ] ); } - time = timer.time(); + logger << " time (ms) " << Stw::nano2Milli( timer.stop() ) << std::endl; ASSERT_RC_SUCCESS( rc ); - logger << " time (ms) " << time << std::endl; } } } @@ -349,7 +344,7 @@ static void build_3d_system( void grbProgram( const simulation_input & in, struct output & out ) { // get user process ID const size_t pid = spmd<>::pid(); - grb::utils::Timer timer; + Stw timer; // standard logger: active only on master node dist_controller_t dist( pid == 0 ); @@ -389,12 +384,11 @@ void grbProgram( const simulation_input & in, struct output & out ) { hpcg_runner.tolerance = residual_zero; hpcg_runner.with_preconditioning = ! in.no_preconditioning; - timer.reset(); + timer.start(); // build the entire multi-grid system build_3d_system( mg_runner.system_levels, coarsener.coarsener_levels, smoother.levels, hpcg_state, in, mg_controller, logger ); - double input_duration = timer.time(); - logger << "input generation time (ms): " << input_duration << std::endl; + logger << "input generation time (ms): " << Stw::nano2Milli( timer.restart() ) << std::endl; #ifdef HPCG_PRINT_SYSTEM if( pid == 0 ) { @@ -423,18 +417,16 @@ void grbProgram( const simulation_input & in, struct output & out ) { } #endif - out.times.preamble = timer.time(); + out.times.preamble = Stw::nano2Milli( timer.restart() ); mg_data_t & grid_base = *mg_runner.system_levels[ 0 ]; // do a cold run to warm the system up logger << TEXT_HIGHLIGHT << "beginning cold run..." << std::endl; hpcg_runner.max_iterations = 1; - timer.reset(); rc = hpcg_runner( grid_base, *hpcg_state, out.cg_out ); - double iter_duration = timer.time(); + logger << " time (ms): " << Stw::nano2Milli( timer.restart() ) << std::endl; ASSERT_RC_SUCCESS( rc ); - logger << " time (ms): " << iter_duration << std::endl; // restore CG options to user-given values hpcg_runner.max_iterations = in.max_iterations; @@ -445,16 +437,14 @@ void grbProgram( const simulation_input & in, struct output & out ) { // initialize CSV writers (if activated) hpcg_csv_t hpcg_csv( hpcg_controller, { "repetition", "time" } ); mg_csv_t mg_csv( mg_controller, { "repetition", "level", "mg time", "smoother time" } ); + timer.reset(); // do benchmark for( size_t i = 0; i < in.inner_test_repetitions; ++i ) { rc = set( x, io_zero ); ASSERT_RC_SUCCESS( rc ); logger << TEXT_HIGHLIGHT << "beginning iteration: " << i << std::endl; - timer.reset(); rc = hpcg_runner( grid_base, *hpcg_state, out.cg_out ); - iter_duration = timer.time(); - out.times.useful += iter_duration; ASSERT_RC_SUCCESS( rc ); hpcg_csv.add_line( i, hpcg_runner.getElapsedNano() ); logger << "repetition,duration (ns): " << hpcg_csv.last_line() << std::endl; @@ -468,6 +458,8 @@ void grbProgram( const simulation_input & in, struct output & out ) { out.inner_test_repetitions++; } + timer.stop(); + out.times.useful += Stw::nano2Milli( timer.getElapsedNano() ); if( in.evaluation_run ) { // get maximum execution time among processes rc = collectives<>::reduce( out.times.useful, 0, operators::max< double >() ); @@ -481,7 +473,7 @@ void grbProgram( const simulation_input & in, struct output & out ) { std::cout.imbue( old_locale ); // start postamble - timer.reset(); + timer.restart(); // set error code to caller out.error_code = rc; @@ -493,7 +485,6 @@ void grbProgram( const simulation_input & in, struct output & out ) { // output out.pinnedVector.reset( new PinnedVector< NonzeroType >( x, SEQUENTIAL ) ); // finish timing - out.times.postamble = timer.time(); // write measurements into CSV files if( in.hpcg_log ) { @@ -502,6 +493,7 @@ void grbProgram( const simulation_input & in, struct output & out ) { if( in.mg_log ) { mg_csv.write_to_file( in.mg_csv.data() ); } + out.times.postamble = Stw::nano2Milli( timer.stop() ); } #define thcout ( std::cout << TEXT_HIGHLIGHT ) From 03783cf6ce9720dd05ca76351148cf120d68748f Mon Sep 17 00:00:00 2001 From: Alberto Scolari Date: Thu, 30 Mar 2023 18:19:19 +0200 Subject: [PATCH 26/28] removing missing (and useless) header from nonblocking matrix --- include/graphblas/nonblocking/matrix.hpp | 1 - 1 file changed, 1 deletion(-) diff --git a/include/graphblas/nonblocking/matrix.hpp b/include/graphblas/nonblocking/matrix.hpp index 251e2037d..5554d78ae 100644 --- a/include/graphblas/nonblocking/matrix.hpp +++ b/include/graphblas/nonblocking/matrix.hpp @@ -50,7 +50,6 @@ #include #include -#include #include #include From 8fe366d6c5c9e6c80c2beddabda7798939f7d9d1 Mon Sep 17 00:00:00 2001 From: Alberto Scolari Date: Fri, 31 Mar 2023 12:25:10 +0200 Subject: [PATCH 27/28] re-flowing long lines --- .../algorithms/hpcg/average_coarsener.hpp | 8 +++--- .../algorithms/hpcg/greedy_coloring.hpp | 3 ++- .../hpcg/single_point_coarsener.hpp | 8 +++--- .../algorithms/hpcg/system_building_utils.hpp | 6 +++-- .../algorithms/multigrid/multigrid_cg.hpp | 16 +++++------ .../algorithms/multigrid/multigrid_data.hpp | 14 +++++----- .../multigrid/multigrid_v_cycle.hpp | 4 +-- .../multigrid/red_black_gauss_seidel.hpp | 27 +++++++++++-------- .../multigrid/single_matrix_coarsener.hpp | 3 ++- .../multigrid/dynamic_vector_storage.hpp | 5 ++-- .../halo_matrix_generator_iterator.hpp | 8 +++--- .../linearized_halo_ndim_iterator.hpp | 15 +++++++---- .../multigrid/linearized_halo_ndim_system.hpp | 12 +++++---- .../multigrid/linearized_ndim_iterator.hpp | 5 +++- .../multigrid/linearized_ndim_system.hpp | 6 ++--- .../graphblas/utils/telemetry/Stopwatch.hpp | 3 ++- tests/smoke/hpcg.cpp | 3 ++- 17 files changed, 88 insertions(+), 58 deletions(-) diff --git a/include/graphblas/algorithms/hpcg/average_coarsener.hpp b/include/graphblas/algorithms/hpcg/average_coarsener.hpp index eb3853c61..983e5ad8f 100644 --- a/include/graphblas/algorithms/hpcg/average_coarsener.hpp +++ b/include/graphblas/algorithms/hpcg/average_coarsener.hpp @@ -341,9 +341,11 @@ namespace grb { grb::utils::multigrid::ArrayVectorStorage< DIMS, CoordType > > system; grb::utils::multigrid::LinearizedNDimSystem< CoordType, grb::utils::multigrid::ArrayVectorStorage< DIMS, CoordType > > _finer_subspace; - grb::utils::multigrid::ArrayVectorStorage< DIMS, CoordType > steps;///< array of steps, i.e. how much each column coordinate (finer system) must be - //// incremented when incrementing the row coordinates; is is the ration between - //// #finer_sizes and row_generator#physical_sizes + /// + /// array of steps, i.e. how much each column coordinate (finer system) must be + /// incremented when incrementing the row coordinates; it is the ratio between + //// #finer_sizes and row_generator#physical_sizes + grb::utils::multigrid::ArrayVectorStorage< DIMS, CoordType > steps; }; } // namespace algorithms diff --git a/include/graphblas/algorithms/hpcg/greedy_coloring.hpp b/include/graphblas/algorithms/hpcg/greedy_coloring.hpp index 24bb1e1e4..366465c41 100644 --- a/include/graphblas/algorithms/hpcg/greedy_coloring.hpp +++ b/include/graphblas/algorithms/hpcg/greedy_coloring.hpp @@ -82,7 +82,8 @@ namespace grb { bool reorder_rows_per_color = false ) { CoordType nrows = system.system_size(); - row_colors.insert( row_colors.begin(), nrows, nrows ); // value `nrows' means `uninitialized'; initialized colors go from 0 to nrow-1 + // value `nrows' means `uninitialized'; initialized colors go from 0 to nrow-1 + row_colors.insert( row_colors.begin(), nrows, nrows ); CoordType totalColors = 1; row_colors[ 0 ] = 0; // first point gets color 0 diff --git a/include/graphblas/algorithms/hpcg/single_point_coarsener.hpp b/include/graphblas/algorithms/hpcg/single_point_coarsener.hpp index 92ef47263..e412a630c 100644 --- a/include/graphblas/algorithms/hpcg/single_point_coarsener.hpp +++ b/include/graphblas/algorithms/hpcg/single_point_coarsener.hpp @@ -312,9 +312,11 @@ namespace grb { const grb::utils::multigrid::LinearizedNDimSystem< CoordType, grb::utils::multigrid::ArrayVectorStorage< DIMS, CoordType > > system; - ArrayType steps; ///< array of steps, i.e. how much each column coordinate (finer system) must be - //// incremented when incrementing the row coordinates; is is the ration between - //// #finer_sizes and row_generator#physical_sizes + /// + /// array of steps, i.e. how much each column coordinate (finer system) must be + /// incremented when incrementing the row coordinates; it is the ratio between + /// #finer_sizes and row_generator#physical_sizes + ArrayType steps; }; } // namespace algorithms diff --git a/include/graphblas/algorithms/hpcg/system_building_utils.hpp b/include/graphblas/algorithms/hpcg/system_building_utils.hpp index ddf9e45a5..6ee46c7b3 100644 --- a/include/graphblas/algorithms/hpcg/system_building_utils.hpp +++ b/include/graphblas/algorithms/hpcg/system_building_utils.hpp @@ -365,7 +365,8 @@ namespace grb { std::vector< size_t >::const_iterator begin = rows.cbegin(); std::vector< size_t >::const_iterator end = rows.cend(); // partition_iteration_range( rows.size(), begin, end ); - grb::RC rc = grb::buildVectorUnique( output_mask, begin, end, true_iter< size_t >( 0 ), true_iter< size_t >( rows.size() ), IOMode::SEQUENTIAL ); + grb::RC rc = grb::buildVectorUnique( output_mask, begin, end, + true_iter< size_t >( 0 ), true_iter< size_t >( rows.size() ), IOMode::SEQUENTIAL ); if( rc != SUCCESS ) { std::cerr << "error while creating output mask for color " << i << ": " << toString( rc ) << std::endl; return rc; @@ -434,7 +435,8 @@ namespace grb { } logger << "- found " << color_counters.size() << " colors," << " generating color masks..."; - return internal::hpcg_build_static_color_masks( system_generator.system_size(), per_color_rows, smoothing_info.color_masks ); + return internal::hpcg_build_static_color_masks( system_generator.system_size(), + per_color_rows, smoothing_info.color_masks ); } } // namespace algorithms diff --git a/include/graphblas/algorithms/multigrid/multigrid_cg.hpp b/include/graphblas/algorithms/multigrid/multigrid_cg.hpp index cd1761589..5fa1a3772 100644 --- a/include/graphblas/algorithms/multigrid/multigrid_cg.hpp +++ b/include/graphblas/algorithms/multigrid/multigrid_cg.hpp @@ -134,14 +134,14 @@ namespace grb { static_assert( std::is_move_constructible< MultiGridRunnerType >::value, "cannot construct the Multi-Grid runner by move" ); - Ring ring; ///< algebraic ring to be used - Minus minus; ///< minus operator to be used - bool with_preconditioning = true; ///< whether preconditioning is enabled - size_t max_iterations = 10; ///< max number of allowed iterations for CG: after that, the solver is halted - ///< and the result achieved so far returned - ResidualType tolerance = ring.template getZero< ResidualType >(); ///< ratio between initial residual and current residual that halts the solver - ///< if reached, for the solution is to be considered "good enough" - + Ring ring; ///< algebraic ring to be used + Minus minus; ///< minus operator to be used + bool with_preconditioning = true; ///< whether preconditioning is enabled + size_t max_iterations = 10; ///< max number of allowed iterations for CG: + ///< after that, the solver is halted and the result achieved so far returned + ResidualType tolerance = ring.template getZero< ResidualType >(); ///< ratio + ///< between initial residual and current residual that halts the solver + ///< if reached, for the solution is to be considered "good enough" MultiGridRunnerType & mg_runner; ///< runner object for MG DbgOutputStreamType dbg_logger; ///< logger to trace execution diff --git a/include/graphblas/algorithms/multigrid/multigrid_data.hpp b/include/graphblas/algorithms/multigrid/multigrid_data.hpp index 4f0d0eed4..a0a76191e 100644 --- a/include/graphblas/algorithms/multigrid/multigrid_data.hpp +++ b/include/graphblas/algorithms/multigrid/multigrid_data.hpp @@ -55,13 +55,15 @@ namespace grb { typename TelControllerType > struct MultiGridData { - grb::utils::telemetry::Stopwatch< TelControllerType > mg_stopwatch; ///< stopwatch to measure the execution time in MG - grb::utils::telemetry::Stopwatch< TelControllerType > sm_stopwatch; ///< stopwatch to measure the execution time in the smoother - const size_t level; ///< level of the grid (0 for the finest physical system) - const size_t system_size; ///< size of the system, i.e. side of the #A system matrix + grb::utils::telemetry::Stopwatch< TelControllerType > mg_stopwatch; ///< stopwatch + ///< to measure the execution time in MG + grb::utils::telemetry::Stopwatch< TelControllerType > sm_stopwatch; ///< stopwatch + ///< to measure the execution time in the smoother + const size_t level; ///< level of the grid (0 for the finest physical system) + const size_t system_size; ///< size of the system, i.e. side of the #A system matrix grb::Matrix< NonzeroType > A; ///< system matrix - grb::Vector< IOType > z; ///< multi-grid solution - grb::Vector< IOType > r; ///< residual + grb::Vector< IOType > z; ///< multi-grid solution + grb::Vector< IOType > r; ///< residual /** * Construct a new multigrid data object from level information and system size. diff --git a/include/graphblas/algorithms/multigrid/multigrid_v_cycle.hpp b/include/graphblas/algorithms/multigrid/multigrid_v_cycle.hpp index 1a036c1cc..bd9a393a4 100644 --- a/include/graphblas/algorithms/multigrid/multigrid_v_cycle.hpp +++ b/include/graphblas/algorithms/multigrid/multigrid_v_cycle.hpp @@ -91,8 +91,8 @@ namespace grb { DbgOutputStreamType dbg_logger; ///< logger to trace execution std::vector< std::unique_ptr< MultiGridInputType > > system_levels; ///< levels of the grid (finest first) - Ring ring; ///< algebraic ring - Minus minus; ///< minus operator + Ring ring; ///< algebraic ring + Minus minus; ///< minus operator // operator to extract the reference out of an std::unique_ptr object struct __extractor { diff --git a/include/graphblas/algorithms/multigrid/red_black_gauss_seidel.hpp b/include/graphblas/algorithms/multigrid/red_black_gauss_seidel.hpp index f004610f4..3b558e9f1 100644 --- a/include/graphblas/algorithms/multigrid/red_black_gauss_seidel.hpp +++ b/include/graphblas/algorithms/multigrid/red_black_gauss_seidel.hpp @@ -81,15 +81,17 @@ namespace grb { using Ring = typename SmootherTypes::Ring; using Minus = typename SmootherTypes::Minus; using Divide = typename SmootherTypes::Divide; - using SmootherInputType = MultiGridData< IOType, NonzeroType, TelControllerType >; ///< external input structure - using SmootherDataType = SmootherData< IOType >; ///< smoothing information and temporary variables (per MG level) - - size_t presmoother_steps = 1UL; ///< number of pre-smoother steps - size_t postsmoother_steps = 1UL; ///< number of post-smoother steps - size_t non_recursive_smooth_steps = 1UL; ///< number of smoother steps for the last grid level + using SmootherInputType = MultiGridData< IOType, NonzeroType, TelControllerType >; ///< external + ///< input structure + using SmootherDataType = SmootherData< IOType >; ///< smoothing information + ///< and temporary variables (per MG level) + + size_t presmoother_steps = 1UL; ///< number of pre-smoother steps + size_t postsmoother_steps = 1UL; ///< number of post-smoother steps + size_t non_recursive_smooth_steps = 1UL; ///< number of smoother steps for the last grid level std::vector< std::unique_ptr< SmootherDataType > > levels; ///< for each grid level, - ///< the smoothing data (finest first) - Ring ring; ///< the algebraic ring + ///< the smoothing data (finest first) + Ring ring; ///< the algebraic ring Minus minus; Divide divide; @@ -136,7 +138,8 @@ namespace grb { /** * Runs a single step of Red-Black Gauss-Seidel for a specific color. * - * @param[in,out] data structure with external containers, corresponsign to an MG level: vector to smooth, system matrix, residual + * @param[in,out] data structure with external containers, corresponsign to an MG level: + * vector to smooth, system matrix, residual * @param[in,out] smoothing_info smoothing-specific information: temporary vectors, color masks * @return grb::RC::SUCCESS if the algorithm could correctly terminate, the error code of the first * unsuccessful operation otherwise @@ -174,7 +177,8 @@ namespace grb { ret = ret ? ret : grb::eWiseLambda( - [ &z, &r, &smoother_temp, &color_mask, &A_diagonal , &ri, &mi, &di ]( const size_t i ) { + [ &z, &r, &smoother_temp, &color_mask, &A_diagonal , + &ri, &mi, &di ]( const size_t i ) { IOType d = A_diagonal[ i ]; IOType v; ri.getMultiplicativeOperator().apply( z[ i ], d, v ); @@ -203,7 +207,8 @@ namespace grb { * and no check is performed to ensure these assumptions hold. Hence, it is up to user logic * to pass correct coloring information. Otherwise, \b no guarantees hold on the result. * - * @param[in,out] data structure with external containers, corresponsign to an MG level: vector to smooth, system matrix, residual + * @param[in,out] data structure with external containers, corresponsign to an MG level: + * vector to smooth, system matrix, residual * @param[in,out] smoothing_info smoothing-specific information: temporary vectors, color masks * @return grb::RC::SUCCESS if the algorithm could correctly terminate, the error code of the first * unsuccessful operation otherwise diff --git a/include/graphblas/algorithms/multigrid/single_matrix_coarsener.hpp b/include/graphblas/algorithms/multigrid/single_matrix_coarsener.hpp index 3b2379802..40f8163f5 100644 --- a/include/graphblas/algorithms/multigrid/single_matrix_coarsener.hpp +++ b/include/graphblas/algorithms/multigrid/single_matrix_coarsener.hpp @@ -83,7 +83,8 @@ namespace grb { using Minus = typename CoarsenerTypes::Minus; using MultiGridInputType = MultiGridData< IOType, NonzeroType, TelControllerType >; ///< input data from MG - using CoarseningDataType = CoarseningData< IOType, NonzeroType >; ///< internal data with coarsening information + using CoarseningDataType = CoarseningData< IOType, NonzeroType >; ///< internal data + ///< with coarsening information static_assert( std::is_default_constructible< Ring >::value, "cannot construct the Ring with default values" ); diff --git a/include/graphblas/utils/multigrid/dynamic_vector_storage.hpp b/include/graphblas/utils/multigrid/dynamic_vector_storage.hpp index fff89b6db..0d6250aae 100644 --- a/include/graphblas/utils/multigrid/dynamic_vector_storage.hpp +++ b/include/graphblas/utils/multigrid/dynamic_vector_storage.hpp @@ -32,8 +32,9 @@ namespace grb { namespace multigrid { /** - * Array with fixed size (i.e. decided at object creation) allocated on the heap with an interface compliant - * to what other classes in the geometry namespace expect, like storage() and dimensions() methods. + * Array with fixed size (i.e. decided at object creation) allocated on the heap + * with an interface compliant to what other classes in the geometry namespace expect, + * like storage() and dimensions() methods. * * It describes a vector of dimensions #dimensions(). * diff --git a/include/graphblas/utils/multigrid/halo_matrix_generator_iterator.hpp b/include/graphblas/utils/multigrid/halo_matrix_generator_iterator.hpp index e51d7d6df..ebda27890 100644 --- a/include/graphblas/utils/multigrid/halo_matrix_generator_iterator.hpp +++ b/include/graphblas/utils/multigrid/halo_matrix_generator_iterator.hpp @@ -69,7 +69,8 @@ namespace grb { typename ValueCallable > struct HaloMatrixGeneratorIterator { - static_assert( std::is_copy_constructible< ValueCallable >::value, "ValueCallable must be copy-constructible" ); + static_assert( std::is_copy_constructible< ValueCallable >::value, + "ValueCallable must be copy-constructible" ); using RowIndexType = CoordType; ///< numeric type of rows using ColumnIndexType = CoordType; @@ -145,8 +146,9 @@ namespace grb { * Increments the iterator by moving coordinates to the next (row, column) to iterate on. * * This operator internally increments the columns coordinates until wrap-around, when it increments - * the row coordinates and resets the column coordinates to the first possible columns; this column coordinate - * depends on the row coordinates according to the dimensions iteration order and on the parameter \p halo. + * the row coordinates and resets the column coordinates to the first possible columns; + * this column coordinate depends on the row coordinates according to the dimensions + * iteration order and on the parameter \p halo. * * @return HaloMatrixGeneratorIterator& \c this object, with the updated state */ diff --git a/include/graphblas/utils/multigrid/linearized_halo_ndim_iterator.hpp b/include/graphblas/utils/multigrid/linearized_halo_ndim_iterator.hpp index 3a5047277..6c020c39d 100644 --- a/include/graphblas/utils/multigrid/linearized_halo_ndim_iterator.hpp +++ b/include/graphblas/utils/multigrid/linearized_halo_ndim_iterator.hpp @@ -262,7 +262,8 @@ namespace grb { throw std::out_of_range( "the system has no more elements" ); } size_t num_neighbours = this->_neighbors_subspace.system_size(); - size_t neighbour_position_offset = this->_neighbors_subspace.ndim_to_linear( this->_neighbor_iter->get_position() ); + size_t neighbour_position_offset = + this->_neighbors_subspace.ndim_to_linear( this->_neighbor_iter->get_position() ); ++( this->_point._element_iter ); this->on_element_advance(); this->_point._position -= neighbour_position_offset; @@ -297,7 +298,8 @@ namespace grb { throw std::range_error( "neighbor linear value beyond system" ); } VectorType final_element( DIMS ); - size_t neighbor_index = ( this->_point._system->neighbour_linear_to_element( final_position, final_element ) ); + size_t neighbor_index = + this->_point._system->neighbour_linear_to_element( final_position, final_element ); this->_point._element_iter = VectorIteratorType( *this->_point._system, final_element.cbegin() ); this->_point._position = final_position; @@ -319,7 +321,8 @@ namespace grb { * It throws if the result cannot be stored as a difference_type variable. */ difference_type operator-( const SelfType & other ) const { - return grb::utils::compute_signed_distance< difference_type, SizeType >( _point.get_position(), other._point.get_position() ); + return grb::utils::compute_signed_distance< difference_type, SizeType >( _point.get_position(), + other._point.get_position() ); } /** @@ -349,7 +352,8 @@ namespace grb { */ inline void on_neighbor_iter_update() { for( size_t i = 0; i < DIMS; i++ ) { - this->_point._neighbor[ i ] = this->_neighbors_start[ i ] + this->_neighbor_iter->get_position()[ i ]; + this->_point._neighbor[ i ] = this->_neighbors_start[ i ] + + this->_neighbor_iter->get_position()[ i ]; } } @@ -360,7 +364,8 @@ namespace grb { void on_element_update() { // reset everything VectorType neighbors_range( DIMS ); - this->_point._system->compute_neighbors_range( this->_point._element_iter->get_position(), this->_neighbors_start, neighbors_range ); + this->_point._system->compute_neighbors_range( this->_point._element_iter->get_position(), + this->_neighbors_start, neighbors_range ); // re-target _neighbors_subspace this->_neighbors_subspace.retarget( neighbors_range ); } diff --git a/include/graphblas/utils/multigrid/linearized_halo_ndim_system.hpp b/include/graphblas/utils/multigrid/linearized_halo_ndim_system.hpp index 1ebe04b73..34e16069d 100644 --- a/include/graphblas/utils/multigrid/linearized_halo_ndim_system.hpp +++ b/include/graphblas/utils/multigrid/linearized_halo_ndim_system.hpp @@ -276,7 +276,8 @@ namespace grb { size_t halo, NDimVector< SizeType, SizeType, DynamicVectorStorage< SizeType > > & config_neighbors ) { - using it_type = typename NDimVector< SizeType, SizeType, DynamicVectorStorage< SizeType > >::DomainIterator; + using it_type = typename NDimVector< SizeType, SizeType, + DynamicVectorStorage< SizeType > >::DomainIterator; it_type end = config_neighbors.domain_end(); for( it_type it = config_neighbors.domain_begin(); it != end; ++it ) { size_t res = 1; @@ -300,9 +301,9 @@ namespace grb { * face slabs or inner slabs) * - dimension 1 (y axis) moves along "rows" within each slab, whose total number of neighbors * depends on whether the row is at the extreme sides (top or bottom of the face) or inside; - * in turn, each type of slab has different geometry (face slabs comprise mesh corners, edges and faces, - * while inner slabs comprise edges, faces and inner elements), thus resulting in 2*2 different - * configurations of dimension-1 total neighbors + * in turn, each type of slab has different geometry (face slabs comprise mesh corners, edges and + * faces, while inner slabs comprise edges, faces and inner elements), thus resulting in + * 2*2 different configurations of dimension-1 total neighbors * - dimension 0 (x axis) moves along "column" elements within each row, where the first (or last) * column has a different number of neighbors than the inner ones; here again are two configuration * for each dimension-1 configuration, leading to a total of 8 dimension-1 configurations @@ -478,7 +479,8 @@ namespace grb { halo_max_neighs = neighbors.at( halo_coords_begin ); } #ifdef _DEBUG - std::cout << "- initial halo - neighbour " << neighbor_linear << std::endl << "\th " << h << std::endl << "\thalo : "; + std::cout << "- initial halo - neighbour " << neighbor_linear + << std::endl << "\th " << h << std::endl << "\thalo : "; print_sequence( halo_coords_begin, halo_coords_end ) << std::endl; std::cout << "\thalo_max_neighs " << halo_max_neighs << std::endl; #endif diff --git a/include/graphblas/utils/multigrid/linearized_ndim_iterator.hpp b/include/graphblas/utils/multigrid/linearized_ndim_iterator.hpp index 9b0e61a8a..a4ae8af5e 100644 --- a/include/graphblas/utils/multigrid/linearized_ndim_iterator.hpp +++ b/include/graphblas/utils/multigrid/linearized_ndim_iterator.hpp @@ -84,7 +84,10 @@ namespace grb { NDimPoint( NDimPoint && ) = delete; - NDimPoint( const LinNDimSysType & _system ) noexcept : system( &_system ), coords( _system.dimensions() ) { + NDimPoint( const LinNDimSysType & _system ) noexcept : + system( &_system ), + coords( _system.dimensions() ) + { std::fill_n( this->coords.begin(), _system.dimensions(), 0 ); } diff --git a/include/graphblas/utils/multigrid/linearized_ndim_system.hpp b/include/graphblas/utils/multigrid/linearized_ndim_system.hpp index a02a0c631..c4b62707a 100644 --- a/include/graphblas/utils/multigrid/linearized_ndim_system.hpp +++ b/include/graphblas/utils/multigrid/linearized_ndim_system.hpp @@ -226,9 +226,9 @@ namespace grb { */ void retarget( ConstVectorReference _new_sizes ) { if( _new_sizes.dimensions() != this->_sizes.dimensions() ) { - throw std::invalid_argument( - "new system must have same dimensions as previous: new " + std::to_string( _new_sizes.dimensions() ) - + ", old " + std::to_string( this->_sizes.dimensions() ) ); + throw std::invalid_argument( "new system must have same dimensions as previous: new " + + std::to_string( _new_sizes.dimensions() ) + ", old " + + std::to_string( this->_sizes.dimensions() ) ); } this->_sizes = _new_sizes; // copy this->_system_size = compute_range_product( _new_sizes.begin(), _new_sizes.end(), diff --git a/include/graphblas/utils/telemetry/Stopwatch.hpp b/include/graphblas/utils/telemetry/Stopwatch.hpp index a607a3cbd..f599ede03 100644 --- a/include/graphblas/utils/telemetry/Stopwatch.hpp +++ b/include/graphblas/utils/telemetry/Stopwatch.hpp @@ -96,7 +96,8 @@ namespace grb { typedef typename std::chrono::high_resolution_clock::time_point time_point_t; - duration_t elapsedTime; ///< measured elapsed time so far, i.e., accumulated time periods between successive calls to #start() and #stop() + duration_t elapsedTime; ///< measured elapsed time so far, i.e., + ///< accumulated time periods between successive calls to #start() and #stop() time_point_t beginning; ///< time instant of last call to #start() diff --git a/tests/smoke/hpcg.cpp b/tests/smoke/hpcg.cpp index dac62457b..e2f5644c2 100644 --- a/tests/smoke/hpcg.cpp +++ b/tests/smoke/hpcg.cpp @@ -629,7 +629,8 @@ static void parse_arguments( simulation_input & sim_in, size_t & outer_iteration // check sizes const size_t max_system_divider = 1 << sim_in.max_coarsening_levels; for( size_t s : { sim_in.nx, sim_in.ny, sim_in.nz } ) { - std::lldiv_t div_res = std::div( static_cast< long long >( s ), static_cast< long long >( max_system_divider ) ); + std::lldiv_t div_res = std::div( static_cast< long long >( s ), + static_cast< long long >( max_system_divider ) ); if( div_res.rem != 0 ) { std::cerr << "ERROR: system size " << s << " cannot be coarsened " << sim_in.max_coarsening_levels << " times because it is not exactly divisible" << std::endl; From bf1145562466cc2214d276909083d3cb8d9413e6 Mon Sep 17 00:00:00 2001 From: "Albert-Jan N. Yzelman" Date: Fri, 23 Jun 2023 11:51:36 +0200 Subject: [PATCH 28/28] Code review of average_coarsener.hpp --- .../algorithms/hpcg/average_coarsener.hpp | 609 ++++++++++-------- .../algorithms/hpcg/system_building_utils.hpp | 2 +- 2 files changed, 335 insertions(+), 276 deletions(-) diff --git a/include/graphblas/algorithms/hpcg/average_coarsener.hpp b/include/graphblas/algorithms/hpcg/average_coarsener.hpp index 983e5ad8f..41abed9e2 100644 --- a/include/graphblas/algorithms/hpcg/average_coarsener.hpp +++ b/include/graphblas/algorithms/hpcg/average_coarsener.hpp @@ -34,320 +34,379 @@ #include #include + namespace grb { + namespace algorithms { - // forward declaration - template< - size_t DIMS, - typename CoordType, - typename ValueType - > class AverageCoarsenerBuilder; - - /** - * Iterator class to generate the coarsening matrix that averages over the elements of the finer - * domain corresponding to the element of the coarser domain. - * - * The coarsening matrix averages \b all elements that are coarsened into one. - * - * This coarsening method requires some computation but should be relatively robust to noise - * or to partitioning strategies to parallelize the smoother (usually run before coarsening). - * - * This iterator is random-access. - * - * @tparam DIMS number of dimensions - * @tparam CoordType type storing the coordinates and the sizes - * @tparam ValueType type of the nonzero: it must be able to represent 1 / - * - */ - template< - size_t DIMS, - typename CoordType, - typename ValueType - > struct AverageGeneratorIterator { - - friend AverageCoarsenerBuilder< DIMS, CoordType, ValueType >; - - using RowIndexType = CoordType; ///< numeric type of rows - using ColumnIndexType = CoordType; - using LinearSystemType = grb::utils::multigrid::LinearizedNDimSystem< CoordType, - grb::utils::multigrid::ArrayVectorStorage< DIMS, CoordType > >; - using LinearSystemIterType = typename LinearSystemType::Iterator; - using SelfType = AverageGeneratorIterator< DIMS, CoordType, ValueType >; - using ArrayType = std::array< CoordType, DIMS >; - - struct _ValueGenerator { - - friend SelfType; - - _ValueGenerator( - RowIndexType i, - ColumnIndexType j, - ValueType value - ) noexcept : - _i( i ), - _j( j ), - _value( value ) {} + namespace hpcg { - _ValueGenerator( const _ValueGenerator & ) = default; + // forward declaration + template< + size_t DIMS, + typename CoordType, + typename ValueType + > + class AverageCoarsenerBuilder; - _ValueGenerator & operator=( const _ValueGenerator & ) = default; + /** + * Iterator class to generate the coarsening matrix that averages over the + * elements of the finer domain corresponding to the element of the coarser + * domain. + * + * The coarsening matrix averages \b all elements that are coarsened into + * one. + * + * This coarsening method requires some computation but should be relatively + * robust to noise or to partitioning strategies that parallelize the + * smoother (usually run before coarsening). + * + * This iterator is random-access. + * + * @tparam DIMS number of dimensions + * @tparam CoordType type storing the coordinates and the sizes + * @tparam ValueType type of the nonzero: it must be able to represent 1 / + * + */ + template< + size_t DIMS, + typename CoordType, + typename ValueType + > + struct AverageGeneratorIterator { - inline RowIndexType i() const { - return _i; - } - inline ColumnIndexType j() const { - return _j; - } - inline ValueType v() const { - return _value; - } + friend AverageCoarsenerBuilder< DIMS, CoordType, ValueType >; - private: - RowIndexType _i; - ColumnIndexType _j; - ValueType _value; - }; + /** Numeric type of rows */ + typedef CoordType RowIndexType; - // interface for std::random_access_iterator - using iterator_category = std::random_access_iterator_tag; - using value_type = _ValueGenerator; - using pointer = const value_type; - using reference = const value_type &; - using difference_type = typename LinearSystemIterType::difference_type; + /** Numeric type of columns */ + typedef CoordType ColumnIndexType; - AverageGeneratorIterator( const SelfType & o ) = default; + typedef typename grb::utils::multigrid::LinearizedNDimSystem< + CoordType, + grb::utils::multigrid::ArrayVectorStorage< DIMS, CoordType > + > LinearSystemType; - AverageGeneratorIterator( SelfType && o ) = default; + typedef typename LinearSystemType::Iterator LinearSystemIterType; - SelfType & operator=( const SelfType & ) = default; + typedef AverageGeneratorIterator< DIMS, CoordType, ValueType > SelfType; - SelfType & operator=( SelfType && ) = default; + typedef std::array< CoordType, DIMS > ArrayType; - /** - * Advances \c this by 1 in constant time. - */ - SelfType & operator++() noexcept { - (void)++_subspace_iter; - size_t subspace_position = _subspace_iter->get_linear_position(); - // std::cout << "subspace_position " << subspace_position << std::endl; - if( subspace_position == _num_neighbors ) { - (void)++_sys_iter; + class ValueGenerator { + + friend SelfType; + + + private: + + RowIndexType _i; + + ColumnIndexType _j; + + ValueType _value; + + + public: + + ValueGenerator( + RowIndexType i, + ColumnIndexType j, + ValueType value + ) noexcept : + _i( i ), + _j( j ), + _value( value ) + {} + + ValueGenerator( const ValueGenerator & ) = default; + + ValueGenerator & operator=( const ValueGenerator & ) = default; + + inline RowIndexType i() const { + return _i; + } + + inline ColumnIndexType j() const { + return _j; + } + + inline ValueType v() const { + return _value; + } + + }; + + // interface for std::random_access_iterator + typedef std::random_access_iterator_tag iterator_category; + + typedef ValueGenerator value_type; + + typedef const value_type pointer; + + typedef const value_type & reference; + + typedef typename LinearSystemIterType::difference_type difference_type; + + AverageGeneratorIterator( const SelfType &o ) = default; + + AverageGeneratorIterator( SelfType && ) = default; + + SelfType & operator=( const SelfType & ) = default; + + SelfType & operator=( SelfType && ) = default; + + /** + * Advances \c this by 1 in constant time. + */ + SelfType & operator++() noexcept { + (void) ++_subspace_iter; + size_t subspace_position = _subspace_iter->get_linear_position(); + if( subspace_position == _num_neighbors ) { + (void) ++_sys_iter; + _subspace_iter = _finer_subspace->begin(); + } + update_coords(); + return *this; + } + + /** + * Advances \c this by \p offset in constant time. + */ + SelfType & operator+=( size_t offset ) { + CoordType sub_offset = _subspace_iter->get_linear_position() + offset; + std::ldiv_t res = std::ldiv( sub_offset, _num_neighbors ); + _sys_iter += res.quot; _subspace_iter = _finer_subspace->begin(); + _subspace_iter += res.rem; + update_coords(); + return *this; } - update_coords(); - return *this; - } - /** - * Advances \c this by \p offset in constant time. - */ - SelfType & operator+=( size_t offset ) { - CoordType sub_offset = _subspace_iter->get_linear_position() + offset; - std::ldiv_t res = std::ldiv( sub_offset, _num_neighbors ); - _sys_iter += res.quot; - _subspace_iter = _finer_subspace->begin(); - _subspace_iter += res.rem; - update_coords(); - return *this; - } + /** + * Computes the difference between \c this and \p o as integer. + */ + difference_type operator-( const SelfType &o ) const { + return this->_sys_iter - o._sys_iter; + } - /** - * Computes the difference between \c this and \p o as integer. - */ - difference_type operator-( const SelfType & o ) const { - return this->_sys_iter - o._sys_iter; - } + /** + * Returns whether \c this and \p o differ. + */ + bool operator!=( const SelfType &o ) const { + return this->_sys_iter != o._sys_iter; + } - /** - * Returns whether \c this and \p o differ. - */ - bool operator!=( const SelfType & o ) const { - return this->_sys_iter != o._sys_iter; - } + /** + * Returns whether \c this and \p o are equal. + */ + bool operator==( const SelfType &o ) const { + return ! this->operator!=( o ); + } - /** - * Returns whether \c this and \p o are equal. - */ - bool operator==( const SelfType & o ) const { - return ! this->operator!=( o ); - } + reference operator*() const { + return _val; + } - reference operator*() const { - return _val; - } + pointer operator->() const { + return &_val; + } - pointer operator->() const { - return &_val; - } + /** + * Returns the current row, within the coarser system. + */ + inline RowIndexType i() const { + return _val.i(); + } - /** - * Returns the current row, within the coarser system. - */ - inline RowIndexType i() const { - return _val.i(); - } + /** + * Returns the current column, within the finer system. + */ + inline ColumnIndexType j() const { + return _val.j(); + } - /** - * Returns the current column, within the finer system. - */ - inline ColumnIndexType j() const { - return _val.j(); - } + /** + * Returns always 1, as the coarsening keeps the same value. + */ + inline ValueType v() const { + return _val.v(); + } - /** - * Returns always 1, as the coarsening keeps the same value. - */ - inline ValueType v() const { - return _val.v(); - } - - private: - const LinearSystemType * _lin_sys; - const LinearSystemType * _finer_subspace; - const ArrayType * _steps; - CoordType _num_neighbors; - LinearSystemIterType _sys_iter; - LinearSystemIterType _subspace_iter; - value_type _val; - /** - * Construct a new AverageGeneratorIterator object starting from the LinearizedNDimSystem - * object \p system describing the \b coarser system and the \b ratios \p steps between each finer and - * the corresponding corser dimension. - * - * @param system LinearizedNDimSystem object describing the coarser system - * @param finer_subspace LinearizedNDimSystem object describing the subspace of each element - * in the finer system - * @param steps ratios per dimension between finer and coarser system - */ - AverageGeneratorIterator( - const LinearSystemType & system, - const LinearSystemType & finer_subspace, - const ArrayType & steps - ) noexcept : - _lin_sys( &system ), - _finer_subspace( &finer_subspace ), - _steps( &steps ), - _num_neighbors( std::accumulate( steps.cbegin(), steps.cend(), 1UL, std::multiplies< CoordType >() ) ), - _sys_iter( system.begin() ), - _subspace_iter( finer_subspace.begin() ), - _val( 0, 0, static_cast< ValueType >( 1 ) / static_cast< ValueType >( _num_neighbors ) ) - { - update_coords(); - } - - void update_coords() noexcept { - _val._i = _sys_iter->get_linear_position(); - _val._j = coarse_rows_to_finer_col(); - } + private: - /** - * Returns the row coordinates converted to the finer system, to compute - * the column value. - */ - ColumnIndexType coarse_rows_to_finer_col() const noexcept { - ColumnIndexType finer = 0; - ColumnIndexType s = 1; - for( size_t i = 0; i < DIMS; i++ ) { - finer += s * _subspace_iter->get_position()[ i ]; - s *= ( *_steps )[ i ]; - finer += s * _sys_iter->get_position()[ i ]; - s *= _lin_sys->get_sizes()[ i ]; + const LinearSystemType * _lin_sys; + const LinearSystemType * _finer_subspace; + const ArrayType * _steps; + CoordType _num_neighbors; + LinearSystemIterType _sys_iter; + LinearSystemIterType _subspace_iter; + value_type _val; + + /** + * Construct a new AverageGeneratorIterator object starting from the + * LinearizedNDimSystem object \p system describing the \b coarser system + * and the \b ratios \p steps between each finer and the corresponding + * coarser dimension. + * + * @param system LinearizedNDimSystem object describing the coarser system + * @param finer_subspace LinearizedNDimSystem object describing the subspace + * of each element in the finer system + * @param steps Ratios per dimension between finer and coarser system + */ + AverageGeneratorIterator( + const LinearSystemType &system, + const LinearSystemType &finer_subspace, + const ArrayType &steps + ) noexcept : + _lin_sys( &system ), + _finer_subspace( &finer_subspace ), + _steps( &steps ), + _num_neighbors( std::accumulate( steps.cbegin(), steps.cend(), 1UL, + std::multiplies< CoordType >() ) ), + _sys_iter( system.begin() ), + _subspace_iter( finer_subspace.begin() ), + _val( 0, 0, static_cast< ValueType >( 1 ) / + static_cast< ValueType >( _num_neighbors ) ) + { + update_coords(); + } + + void update_coords() noexcept { + _val._i = _sys_iter->get_linear_position(); + _val._j = coarse_rows_to_finer_col(); } - return finer; - } - }; - - /** - * Builder object to create iterators that generate an averaging-coarsening matrix. - * - * It is a facility to generate beginning and end iterators and abstract the logic away from users. - * - * @tparam DIMS number of dimensions - * @tparam CoordType type storing the coordinates and the sizes - * @tparam ValueType type of the nonzero: it must be able to represent 1 (the value to sample - * the finer value) - */ - template< - size_t DIMS, - typename CoordType, - typename ValueType - > class AverageCoarsenerBuilder { - public: - using ArrayType = std::array< CoordType, DIMS >; - using Iterator = AverageGeneratorIterator< DIMS, CoordType, ValueType >; - using SelfType = AverageCoarsenerBuilder< DIMS, CoordType, ValueType >; + + /** + * Returns the row coordinates converted to the finer system, to compute + * the column value. + */ + ColumnIndexType coarse_rows_to_finer_col() const noexcept { + ColumnIndexType finer = 0; + ColumnIndexType s = 1; + for( size_t i = 0; i < DIMS; i++ ) { + finer += s * _subspace_iter->get_position()[ i ]; + s *= ( *_steps )[ i ]; + finer += s * _sys_iter->get_position()[ i ]; + s *= _lin_sys->get_sizes()[ i ]; + } + return finer; + } + + }; /** - * Construct a new AverageCoarsenerBuilder object from the sizes of finer system - * and those of the coarser system; finer sizes must be an exact multiple of coarser sizes, - * otherwise an exception is raised. + * Builder object to create iterators that generate an averaging-coarsening + * matrix. + * + * It is a facility to generate beginning and end iterators and abstract the + * logic away from users. + * + * @tparam DIMS number of dimensions + * @tparam CoordType type storing the coordinates and the sizes + * @tparam ValueType type of the nonzero: it must be able to represent 1 + * (the value to sample the finer value) */ - AverageCoarsenerBuilder( - const ArrayType & _finer_sizes, - const ArrayType & _coarser_sizes - ) : - system( _coarser_sizes.begin(), _coarser_sizes.end() ), - _finer_subspace( _coarser_sizes.cbegin(), _coarser_sizes.cend() ), - steps( DIMS ) - { - for( size_t i = 0; i < DIMS; i++ ) { - // finer size MUST be an exact multiple of coarser_size - std::ldiv_t ratio = std::ldiv( _finer_sizes[ i ], _coarser_sizes[ i ] ); - if( ratio.quot < 2 || ratio.rem != 0 ) { - throw std::invalid_argument( std::string( "finer size of dimension " ) - + std::to_string( i ) + std::string( "is not an exact multiple of coarser size" ) ); + template< + size_t DIMS, + typename CoordType, + typename ValueType + > + class AverageCoarsenerBuilder { + + public: + + typedef std::array< CoordType, DIMS > ArrayType; + typedef AverageGeneratorIterator< DIMS, CoordType, ValueType > Iterator; + typedef AverageCoarsenerBuilder< DIMS, CoordType, ValueType > SelfType; + + /** + * Construct a new AverageCoarsenerBuilder object from the sizes of finer + * system and those of the coarser system; finer sizes must be an exact + * multiple of coarser sizes, otherwise an exception is raised. + */ + AverageCoarsenerBuilder( + const ArrayType &_finer_sizes, + const ArrayType &_coarser_sizes + ) : + system( _coarser_sizes.begin(), _coarser_sizes.end() ), + _finer_subspace( _coarser_sizes.cbegin(), _coarser_sizes.cend() ), + steps( DIMS ) + { + for( size_t i = 0; i < DIMS; i++ ) { + // finer size MUST be an exact multiple of coarser_size + std::ldiv_t ratio = std::ldiv( _finer_sizes[ i ], _coarser_sizes[ i ] ); + if( ratio.quot < 2 || ratio.rem != 0 ) { + throw std::invalid_argument( + std::string( "finer size of dimension " ) + std::to_string( i ) + + std::string( "is not an exact multiple of coarser size" ) ); + } + steps[ i ] = ratio.quot; + } + _finer_subspace.retarget( steps ); } - steps[ i ] = ratio.quot; - } - _finer_subspace.retarget( steps ); - } - AverageCoarsenerBuilder( const SelfType & ) = delete; + AverageCoarsenerBuilder( const SelfType & ) = delete; - AverageCoarsenerBuilder( SelfType && ) = delete; + AverageCoarsenerBuilder( SelfType && ) = delete; - SelfType & operator=( const SelfType & ) = delete; + SelfType & operator=( const SelfType & ) = delete; - SelfType & operator=( SelfType && ) = delete; + SelfType & operator=( SelfType && ) = delete; - /** - * Returns the size of the finer system, i.e. its number of elements. - */ - size_t system_size() const { - return system.system_size(); - } + /** + * Returns the size of the finer system, i.e. its number of elements. + */ + size_t system_size() const { + return system.system_size(); + } - /** - * Produces a beginning iterator to generate the coarsening matrix. - */ - Iterator make_begin_iterator() { - return Iterator( system, _finer_subspace, steps ); - } + /** + * Produces a beginning iterator to generate the coarsening matrix. + */ + Iterator make_begin_iterator() { + return Iterator( system, _finer_subspace, steps ); + } - /** - * Produces an end iteratormto stop the generation of the coarsening matrix. - */ - Iterator make_end_iterator() { - Iterator result( system, _finer_subspace, steps ); - result += ( system_size() * _finer_subspace.system_size() ); // do not trigger boundary checks - // ++result; - return result; - } - - private: - const grb::utils::multigrid::LinearizedNDimSystem< CoordType, - grb::utils::multigrid::ArrayVectorStorage< DIMS, CoordType > > system; - grb::utils::multigrid::LinearizedNDimSystem< CoordType, - grb::utils::multigrid::ArrayVectorStorage< DIMS, CoordType > > _finer_subspace; - /// - /// array of steps, i.e. how much each column coordinate (finer system) must be - /// incremented when incrementing the row coordinates; it is the ratio between - //// #finer_sizes and row_generator#physical_sizes - grb::utils::multigrid::ArrayVectorStorage< DIMS, CoordType > steps; - }; + /** + * Produces an end iterator to stop the generation of the coarsening + * matrix. + */ + Iterator make_end_iterator() { + Iterator result( system, _finer_subspace, steps ); + // do not trigger boundary checks + result += ( system_size() * _finer_subspace.system_size() ); + return result; + } + + + private: + + const grb::utils::multigrid::LinearizedNDimSystem< + CoordType, + grb::utils::multigrid::ArrayVectorStorage< DIMS, CoordType > + > system; + + grb::utils::multigrid::LinearizedNDimSystem< + CoordType, + grb::utils::multigrid::ArrayVectorStorage< DIMS, CoordType > + > _finer_subspace; + + /** + * Array of steps, i.e. how much each column coordinate (finer system) must + * be incremented when incrementing the row coordinates; it is the ratio + * between #finer_sizes and row_generator#physical_sizes + */ + grb::utils::multigrid::ArrayVectorStorage< DIMS, CoordType > steps; + }; + + } // namespace internal } // namespace algorithms + } // namespace grb + #endif // _H_GRB_ALGORITHMS_AVERAGE_COARSENER + diff --git a/include/graphblas/algorithms/hpcg/system_building_utils.hpp b/include/graphblas/algorithms/hpcg/system_building_utils.hpp index 6ee46c7b3..37e6da311 100644 --- a/include/graphblas/algorithms/hpcg/system_building_utils.hpp +++ b/include/graphblas/algorithms/hpcg/system_building_utils.hpp @@ -243,7 +243,7 @@ namespace grb { CoarseningData< IOType, NonzeroType > & coarsener ) { return hpcg_populate_coarsener_any_builder< - grb::algorithms::AverageCoarsenerBuilder< DIMS, CoordType, NonzeroType > >( + grb::algorithms::hpcg::AverageCoarsenerBuilder< DIMS, CoordType, NonzeroType > >( finer_system_generator, coarser_system_generator, coarsener ); }