From beaf4452c7afd63560fe6252c5092096887f8331 Mon Sep 17 00:00:00 2001 From: Benjamin Lozes Date: Tue, 16 Jan 2024 15:56:15 +0100 Subject: [PATCH 1/7] Add an assertion for changing num_threads along execution --- include/graphblas/reference/coordinates.hpp | 11 +++++++++++ include/graphblas/reference/vector.hpp | 3 +++ 2 files changed, 14 insertions(+) diff --git a/include/graphblas/reference/coordinates.hpp b/include/graphblas/reference/coordinates.hpp index 2a1f90137..2ef6e8a4b 100644 --- a/include/graphblas/reference/coordinates.hpp +++ b/include/graphblas/reference/coordinates.hpp @@ -104,6 +104,13 @@ namespace grb { /** Per-thread capacity for parallel stack updates. */ size_t _buf; + /** Number of threads for which these coordinates have been initialised. */ +#ifdef _H_GRB_REFERENCE_OMP_COORDINATES + const size_t _threads = config::OMP::threads(); +#else + const size_t _threads = 1; +#endif + /** * Increments the number of nonzeroes in the current thread-local stack. * @@ -440,6 +447,10 @@ namespace grb { // blocks are not managed by this class) } + size_t requiredThreadsForUpdate() const noexcept { + return _threads; + } + /** * @returns An empty thread-local stack for new nonzeroes. */ diff --git a/include/graphblas/reference/vector.hpp b/include/graphblas/reference/vector.hpp index 4be006ed5..a72bc14f5 100644 --- a/include/graphblas/reference/vector.hpp +++ b/include/graphblas/reference/vector.hpp @@ -1308,6 +1308,9 @@ namespace grb { template< typename D, typename C > inline C & getCoordinates( Vector< D, reference, C > &x ) noexcept { +#ifdef _H_GRB_REFERENCE_OMP_VECTOR + assert( x._coordinates.requiredThreadsForUpdate() == config::OMP::threads() ); +#endif return x._coordinates; } From a35f380fed2c75599b5e32491c8b7c2ad64c7225 Mon Sep 17 00:00:00 2001 From: Benjamin Lozes Date: Tue, 16 Jan 2024 17:12:06 +0100 Subject: [PATCH 2/7] Fix the assertion to be callable from parallel context --- include/graphblas/omp/config.hpp | 9 +++++++++ include/graphblas/reference/vector.hpp | 7 ++++++- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/include/graphblas/omp/config.hpp b/include/graphblas/omp/config.hpp index e4a108319..274624bf9 100644 --- a/include/graphblas/omp/config.hpp +++ b/include/graphblas/omp/config.hpp @@ -90,6 +90,15 @@ namespace grb { return ret; } + /** + * @returns The maximum number of threads reported by OpenMP. + * + * This function can be called from any context. + */ + static size_t maxThreads() { + return omp_get_max_threads(); + } + /** * @returns The number of threads in the current OpenMP parallel section. * diff --git a/include/graphblas/reference/vector.hpp b/include/graphblas/reference/vector.hpp index a72bc14f5..704d42bf6 100644 --- a/include/graphblas/reference/vector.hpp +++ b/include/graphblas/reference/vector.hpp @@ -1309,7 +1309,12 @@ namespace grb { template< typename D, typename C > inline C & getCoordinates( Vector< D, reference, C > &x ) noexcept { #ifdef _H_GRB_REFERENCE_OMP_VECTOR - assert( x._coordinates.requiredThreadsForUpdate() == config::OMP::threads() ); + if( x._coordinates.requiredThreadsForUpdate() != config::OMP::maxThreads() ) { + #pragma omp critical + std::cerr << " " << x._coordinates.requiredThreadsForUpdate() + << " != " << config::OMP::maxThreads() << "\n"; + } + assert( x._coordinates.requiredThreadsForUpdate() == config::OMP::maxThreads() ); #endif return x._coordinates; } From 11b57b6accc37efafefe0447be33b2b278c9df05 Mon Sep 17 00:00:00 2001 From: Benjamin Lozes Date: Mon, 26 Feb 2024 15:01:35 +0100 Subject: [PATCH 3/7] Review fix --- include/graphblas/reference/vector.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/graphblas/reference/vector.hpp b/include/graphblas/reference/vector.hpp index 704d42bf6..7fb42fa85 100644 --- a/include/graphblas/reference/vector.hpp +++ b/include/graphblas/reference/vector.hpp @@ -1308,7 +1308,7 @@ namespace grb { template< typename D, typename C > inline C & getCoordinates( Vector< D, reference, C > &x ) noexcept { -#ifdef _H_GRB_REFERENCE_OMP_VECTOR +#if defined(_H_GRB_REFERENCE_OMP_VECTOR) && !defined(NDEBUG) if( x._coordinates.requiredThreadsForUpdate() != config::OMP::maxThreads() ) { #pragma omp critical std::cerr << " " << x._coordinates.requiredThreadsForUpdate() From 5f35db938b3008bf0fca987bb8c61e72df7a95ff Mon Sep 17 00:00:00 2001 From: "Albert-Jan N. Yzelman" Date: Tue, 13 Jan 2026 17:36:34 +0100 Subject: [PATCH 4/7] Reviewed the MR --- include/graphblas/omp/config.hpp | 9 -- include/graphblas/reference/coordinates.hpp | 124 +++++++++++++++++--- include/graphblas/reference/vector.hpp | 12 +- 3 files changed, 112 insertions(+), 33 deletions(-) diff --git a/include/graphblas/omp/config.hpp b/include/graphblas/omp/config.hpp index 274624bf9..e4a108319 100644 --- a/include/graphblas/omp/config.hpp +++ b/include/graphblas/omp/config.hpp @@ -90,15 +90,6 @@ namespace grb { return ret; } - /** - * @returns The maximum number of threads reported by OpenMP. - * - * This function can be called from any context. - */ - static size_t maxThreads() { - return omp_get_max_threads(); - } - /** * @returns The number of threads in the current OpenMP parallel section. * diff --git a/include/graphblas/reference/coordinates.hpp b/include/graphblas/reference/coordinates.hpp index 2ef6e8a4b..5d3d05aca 100644 --- a/include/graphblas/reference/coordinates.hpp +++ b/include/graphblas/reference/coordinates.hpp @@ -59,9 +59,32 @@ namespace grb { /** * This class encapsulates everything needed to store a sparse set of 1D - * coordinates. Its use is internal via, e.g., grb::Vector< T, reference, C >. - * All functions needed to rebuild or update sparsity information are - * encapsulated here. + * coordinates. + * + * Its use is internal via, e.g., grb::Vector< T, reference, C >. All + * functions needed to rebuild or update sparsity information are encapsulated + * here. + * + * An instance of this class should always be initialised using one of the + * following functions: + * -# set + * -# set_seq + * -# set_ompPar + * -# setDense + * The use of both the default constructor and that of setDense does not lead + * to a fully initialised instance and may only be used in restricted use + * cases. + * + * The coordinates are in essense traditional sparse accumulators. There are + * three main arrays in which sparsity information is stored: + * -# _assigned + * -# _stack + * -# _buffer + * + * In the case of shared-memory parallel backends, the size of the buffer + * depends on the number of threads the instance needs to support. (Or + * rather, this is the case for all presently-supported shared-memory parallel + * backends). */ template<> class Coordinates< reference > { @@ -105,11 +128,7 @@ namespace grb { size_t _buf; /** Number of threads for which these coordinates have been initialised. */ -#ifdef _H_GRB_REFERENCE_OMP_COORDINATES - const size_t _threads = config::OMP::threads(); -#else - const size_t _threads = 1; -#endif + size_t _threads; /** * Increments the number of nonzeroes in the current thread-local stack. @@ -170,6 +189,7 @@ namespace grb { _n = 0; _cap = 0; _buf = 0; + _threads = 0; return; } @@ -189,6 +209,11 @@ namespace grb { // initialise _n = 0; _cap = dim; +#ifdef _H_GRB_REFERENCE_OMP_COORDINATES + _threads = config::OMP::threads(); +#else + _threads = 1; +#endif } /** @@ -377,7 +402,7 @@ namespace grb { /** Base constructor. Creates an empty coordinates list of dimension 0. */ inline Coordinates() noexcept : _assigned( nullptr ), _stack( nullptr ), _buffer( nullptr ), - _n( 0 ), _cap( 0 ), _buf( 0 ) + _n( 0 ), _cap( 0 ), _buf( 0 ), _threads( 0 ) {} /** @@ -386,12 +411,12 @@ namespace grb { */ inline Coordinates( Coordinates &&x ) noexcept : _assigned( x._assigned ), _stack( x._stack ), _buffer( x._buffer ), - _n( x._n ), _cap( x._cap ), _buf( x._buf ) + _n( x._n ), _cap( x._cap ), _buf( x._buf ), _threads( x._threads ) { x._assigned = nullptr; x._stack = nullptr; x._buffer = nullptr; - x._n = x._cap = x._buf = 0; + x._n = x._cap = x._buf = x._threads = 0; } /** @@ -403,7 +428,7 @@ namespace grb { */ inline Coordinates( const Coordinates &x ) noexcept : _assigned( x._assigned ), _stack( x._stack ), _buffer( x._buffer ), - _n( x._n ), _cap( x._cap ), _buf( x._buf ) + _n( x._n ), _cap( x._cap ), _buf( x._buf ), _threads( x._threads ) { // self-assignment is a programming error assert( this != &x ); @@ -433,9 +458,10 @@ namespace grb { _n = x._n; _cap = x._cap; _buf = x._buf; + _threads = x._threads; x._assigned = NULL; x._stack = x._buffer = NULL; - x._n = x._cap = x._buf = 0; + x._n = x._cap = x._buf = x._threads = 0; return *this; } @@ -447,8 +473,68 @@ namespace grb { // blocks are not managed by this class) } - size_t requiredThreadsForUpdate() const noexcept { - return _threads; + /** + * Checks whether a new OpenMP parallel section returns the same number of + * threads that was given during initialisation of this instance. + * + * This variant should be called from a sequential context. + * + * This is intended exclusively for use within a debug mode, as the test + * has the significant overhead of opening up an OpenMP parallel section. + * + * @returns true if the number of threads matches that during + * setup; + * @returns false otherwise. + * + * An assertion will trip in debug mode instead of returning false, + * however. + */ + bool checkNumThreadsSeq() const noexcept { + const size_t actualThreads = config::OMP::threads(); + if( actualThreads != _threads ) { + std::cerr << "\t Error: coordinates instance was set for " << _threads + << " threads, however, current OpenMP parallel region reports " + << actualThreads << " threads instead!\n"; +#ifndef NDEBUG + const bool num_omp_threads_has_changed = false; + assert( num_omp_threads_has_changed ); +#endif + return false; + } + return true; + } + + /** + * Checks whether a new OpenMP parallel section returns the same number of + * threads that was given during initialisation of this instance. + * + * This variant should be called from a sequential context. + * + * This is intended for use within a debug mode, but could conceivably be + * used defensively in performance mode as well (there is no significant + * performance overhead for this variant). + * + * @returns true if the number of threads matches that during + * setup; + * @returns false otherwise. + * + * An assertion will trip in debug mode instead of returning false, + * however. + */ + bool checkNumThreadsPar() const noexcept { + const size_t actualThreads = + static_cast< size_t >( omp_get_num_threads() ); + if( actualThreads != _threads ) { + std::cerr << "\t Error: coordinates instance was set for " << _threads + << " threads, however, current OpenMP parallel region reports " + << actualThreads << " threads instead!\n"; +#ifndef NDEBUG + const bool num_omp_threads_has_changed = false; + assert( num_omp_threads_has_changed ); +#endif + return false; + } + return true; } /** @@ -468,8 +554,9 @@ namespace grb { } /** - * Sets the data structure. A call to this function sets the number of - * coordinates to zero. + * Sets the data structure. + * + * A call to this function sets the number of coordinates to zero. * * @param[in] arr Pointer to an array of size #arraySize. This array is * is managed by a container outside this class (and thus @@ -481,6 +568,8 @@ namespace grb { * managed by a container outside this class (and thus will * not be freed on destruction of this instance). * @param[in] dim Size (dimension) of this vector, in number of elements. + * @param[in] threads The number of threads that \a buf has been constructed + * for. * * The memory area \a raw will be reset to reflect an empty coordinate set. * @@ -576,6 +665,7 @@ namespace grb { _n = dim; _cap = dim; _buf = 0; + _threads = 0; } /** diff --git a/include/graphblas/reference/vector.hpp b/include/graphblas/reference/vector.hpp index 7fb42fa85..ee691121a 100644 --- a/include/graphblas/reference/vector.hpp +++ b/include/graphblas/reference/vector.hpp @@ -1308,13 +1308,8 @@ namespace grb { template< typename D, typename C > inline C & getCoordinates( Vector< D, reference, C > &x ) noexcept { -#if defined(_H_GRB_REFERENCE_OMP_VECTOR) && !defined(NDEBUG) - if( x._coordinates.requiredThreadsForUpdate() != config::OMP::maxThreads() ) { - #pragma omp critical - std::cerr << " " << x._coordinates.requiredThreadsForUpdate() - << " != " << config::OMP::maxThreads() << "\n"; - } - assert( x._coordinates.requiredThreadsForUpdate() == config::OMP::maxThreads() ); +#ifdef _H_GRB_REFERENCE_OMP_VECTOR + (void) x._coordinates.checkNumThreadsSeq(); #endif return x._coordinates; } @@ -1323,6 +1318,9 @@ namespace grb { inline const C & getCoordinates( const Vector< D, reference, C > &x ) noexcept { +#ifdef _H_GRB_REFERENCE_OMP_VECTOR + (void) x._coordinates.checkNumThreadsSeq(); +#endif return x._coordinates; } From 526413dee7f8267406fd47de6e4dd329bcbf06e2 Mon Sep 17 00:00:00 2001 From: "Albert-Jan N. Yzelman" Date: Tue, 13 Jan 2026 17:57:30 +0100 Subject: [PATCH 5/7] That did not really work -- the internal getters are called both from sequential and parallel contexts. Refined it a bit and trying again --- include/graphblas/reference/coordinates.hpp | 25 +++++++++++++++++++++ include/graphblas/reference/vector.hpp | 8 +++---- 2 files changed, 29 insertions(+), 4 deletions(-) diff --git a/include/graphblas/reference/coordinates.hpp b/include/graphblas/reference/coordinates.hpp index 5d3d05aca..b68bfda7b 100644 --- a/include/graphblas/reference/coordinates.hpp +++ b/include/graphblas/reference/coordinates.hpp @@ -473,6 +473,31 @@ namespace grb { // blocks are not managed by this class) } + /** + * Checks whether a new OpenMP parallel section returns the same number of + * threads that was given during initialisation of this instance. + * + * This variant automatically determines if it is in a sequential or + * (OpenMP) parallel context. + * + * This is intended exclusively for use within a debug mode, as the test + * has the significant overhead of opening up an OpenMP parallel section. + * + * @returns true if the number of threads matches that during + * setup; + * @returns false otherwise. + * + * An assertion will trip in debug mode instead of returning false, + * however. + */ + bool checkNumThreads() const noexcept { + if( omp_in_parallel() ) { + return checkNumThreadsPar(); + } else { + return checkNumThreadsSeq(); + } + } + /** * Checks whether a new OpenMP parallel section returns the same number of * threads that was given during initialisation of this instance. diff --git a/include/graphblas/reference/vector.hpp b/include/graphblas/reference/vector.hpp index ee691121a..20a015649 100644 --- a/include/graphblas/reference/vector.hpp +++ b/include/graphblas/reference/vector.hpp @@ -1308,8 +1308,8 @@ namespace grb { template< typename D, typename C > inline C & getCoordinates( Vector< D, reference, C > &x ) noexcept { -#ifdef _H_GRB_REFERENCE_OMP_VECTOR - (void) x._coordinates.checkNumThreadsSeq(); +#if defined(_H_GRB_REFERENCE_OMP_VECTOR) && !defined(NDEBUG) + (void) x._coordinates.checkNumThreads(); #endif return x._coordinates; } @@ -1318,8 +1318,8 @@ namespace grb { inline const C & getCoordinates( const Vector< D, reference, C > &x ) noexcept { -#ifdef _H_GRB_REFERENCE_OMP_VECTOR - (void) x._coordinates.checkNumThreadsSeq(); +#if defined(_H_GRB_REFERENCE_OMP_VECTOR) && !defined(NDEBUG) + (void) x._coordinates.checkNumThreads(); #endif return x._coordinates; } From 1561878be5b0a94e0af6f8b668f31880a543c106 Mon Sep 17 00:00:00 2001 From: "Albert-Jan N. Yzelman" Date: Wed, 14 Jan 2026 12:22:19 +0100 Subject: [PATCH 6/7] Unit tests caught that sometimes uninitialised coordinate instances are retrieved (and the instance is then inspected to check whether it is valid or not-- which is valid use). The new thread assertion check mechanism now copes with that --- include/graphblas/reference/coordinates.hpp | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/include/graphblas/reference/coordinates.hpp b/include/graphblas/reference/coordinates.hpp index b68bfda7b..d808fc631 100644 --- a/include/graphblas/reference/coordinates.hpp +++ b/include/graphblas/reference/coordinates.hpp @@ -515,6 +515,11 @@ namespace grb { * however. */ bool checkNumThreadsSeq() const noexcept { + // guard against (valid) use of non-initialised coordinates + if( _threads == 0 ) { + return true; + } + // then, get actual number of threads now active and compare const size_t actualThreads = config::OMP::threads(); if( actualThreads != _threads ) { std::cerr << "\t Error: coordinates instance was set for " << _threads @@ -547,6 +552,11 @@ namespace grb { * however. */ bool checkNumThreadsPar() const noexcept { + // guard against (valid) use of non-initialised coordinates + if( _threads == 0 ) { + return true; + } + // then, get actual number of threads now active and compare const size_t actualThreads = static_cast< size_t >( omp_get_num_threads() ); if( actualThreads != _threads ) { From 52bbf2498f776b09cf2ef5a980898dc1b0901294 Mon Sep 17 00:00:00 2001 From: "Albert-Jan N. Yzelman" Date: Wed, 14 Jan 2026 13:20:15 +0100 Subject: [PATCH 7/7] Unit tests were taking a bit long and indeed the performance overhead of the new check caused an issue for any test using debug-mode iterators over large vectors. This commit hence includes a performance fix (which should also benefit the performance of vector extraction in performance mode). --- include/graphblas/reference/vector.hpp | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/include/graphblas/reference/vector.hpp b/include/graphblas/reference/vector.hpp index 20a015649..caa58aa4a 100644 --- a/include/graphblas/reference/vector.hpp +++ b/include/graphblas/reference/vector.hpp @@ -653,6 +653,9 @@ namespace grb { /** The maximum value of #position. */ size_t max; + /** The size of the vector (not necessarily equal to \a max). */ + size_t n; + /** The local process ID. */ const size_t s; @@ -689,10 +692,14 @@ namespace grb { // if not, go to the next valid value: if( container->_coordinates.isEmpty() ) { max = 0; - } else if( container->_coordinates.isDense() ) { - max = container->_coordinates.size(); + n = 0; } else { - max = container->_coordinates.nonzeroes(); + n = container->_coordinates.size(); + if( container->_coordinates.isDense() ) { + max = n; + } else { + max = container->_coordinates.nonzeroes(); + } } if( position < max ) { setValue(); @@ -712,7 +719,7 @@ namespace grb { * \note If the \a other iterator is not derived from the same container * as this iterator, the result is undefined. */ - bool equal( const ConstIterator & other ) const noexcept { + bool equal( const ConstIterator &other ) const noexcept { return other.position == position; } @@ -729,7 +736,7 @@ namespace grb { } assert( container->_coordinates.assigned( index ) ); const size_t global_index = ActiveDistribution::local_index_to_global( - index, size( *container ), s, P ); + index, n, s, P ); #ifdef _DEBUG std::cout << "\t ConstIterator at process " << s << " / " << P << " translated index " << index << " to " << global_index << "\n"; @@ -742,7 +749,7 @@ namespace grb { /** Default constructor. */ ConstIterator() noexcept : - container( nullptr ), position( 0 ), max( 0 ), + container( nullptr ), position( 0 ), max( 0 ), n( 0 ), s( grb::spmd< spmd_backend >::pid() ), P( grb::spmd< spmd_backend >::nprocs() ) {} @@ -751,7 +758,7 @@ namespace grb { ConstIterator( const ConstIterator &other ) noexcept : container( other.container ), value( other.value ), position( other.position ), - max( other.max ), + max( other.max ), n( other.n ), s( other.s ), P( other.P ) {} @@ -762,6 +769,7 @@ namespace grb { std::swap( value, other.value ); std::swap( position, other.position ); std::swap( max, other.max ); + std::swap( n, other.n ); } /** Copy assignment. */ @@ -770,6 +778,7 @@ namespace grb { value = other.value; position = other.position; max = other.max; + n = other.n; assert( s == other.s ); assert( P == other.P ); return *this; @@ -781,6 +790,7 @@ namespace grb { std::swap( value, other.value ); std::swap( position, other.position ); std::swap( max, other.max ); + std::swap( n, other.n ); assert( s == other.s ); assert( P == other.P ); return *this;