diff --git a/CMakeLists.txt b/CMakeLists.txt index 16408517d..adbc01d63 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -14,6 +14,7 @@ option (BUILD_SHARED_LIBS "Build shared libraries" ON) option (BUILD_TESTS "Build tests" ON) option (ENABLE_OPENMP "Use OpenMP for multithreading" OFF) +option (ENABLE_HPX "Use HPX for multithreading" OFF) option (ENABLE_THREADS "Use pthread for multithreading" OFF) option (WITH_COMBINED_THREADS "Merge thread library" OFF) @@ -122,6 +123,10 @@ if (Threads_FOUND) set (HAVE_THREADS TRUE) endif () +if(ENABLE_OPENMP AND ENABLE_HPX) + message(FATAL "OpenMP and HPX cannot be enabled at the same time") +endif () + if (ENABLE_OPENMP) find_package (OpenMP) endif () @@ -129,6 +134,13 @@ if (OPENMP_FOUND) set (HAVE_OPENMP TRUE) endif () +if (ENABLE_HPX) + find_package (HPX) +endif () +if (HPX_FOUND) + set (HAVE_HPX TRUE) +endif () + include (CheckCCompilerFlag) if (ENABLE_SSE) @@ -257,6 +269,7 @@ set(fftw_par_SOURCE set (fftw_threads_SOURCE ${fftw_par_SOURCE} threads/threads.c) set (fftw_omp_SOURCE ${fftw_par_SOURCE} threads/openmp.c) +set (fftw_hpx_SOURCE ${fftw_par_SOURCE} threads/hpx.cpp) include_directories (.) @@ -365,6 +378,16 @@ if (OPENMP_FOUND) target_compile_options (${fftw3_lib}_omp PRIVATE ${OpenMP_C_FLAGS}) endif () +if (HPX_FOUND) + add_library (${fftw3_lib}_hpx SHARED ${fftw_hpx_SOURCE}) + target_include_directories (${fftw3_lib}_hpx INTERFACE $) + target_link_libraries (${fftw3_lib}_hpx PUBLIC HPX::hpx) + target_link_libraries (${fftw3_lib}_hpx ${fftw3_lib}) + target_link_libraries (${fftw3_lib}_hpx ${CMAKE_THREAD_LIBS_INIT}) + list (APPEND subtargets ${fftw3_lib}_hpx) + target_compile_options (${fftw3_lib}_hpx PRIVATE HPX::hpx) +endif () + foreach(subtarget ${subtargets}) set_target_properties (${subtarget} PROPERTIES SOVERSION 3.6.9 VERSION 3) install (TARGETS ${subtarget} diff --git a/configure.ac b/configure.ac index b7fd7f1e9..6a9dcaf55 100644 --- a/configure.ac +++ b/configure.ac @@ -296,6 +296,7 @@ AC_LIBTOOL_WIN32_DLL AC_PROG_LIBTOOL AC_PROG_RANLIB AC_PROG_CPP +AC_PROG_CXX AC_CHECK_PROG(OCAMLBUILD, ocamlbuild, ocamlbuild) @@ -677,6 +678,56 @@ if test "$enable_openmp" = "yes"; then AX_OPENMP([], [AC_MSG_ERROR([don't know how to enable OpenMP])]) fi +AC_ARG_ENABLE(hpx, [AC_HELP_STRING([--enable-hpx],[use HPX for parallelism])], enable_hpx=$enableval, enable_hpx=no) + +if test "$enable_hpx" = "yes"; then + AC_SUBST(HPX_LIBS) + AC_SUBST(HPX_CFLAGS) + + AC_CHECK_PROG(HAS_PKGCONFIG, pkg-config, yes, no, [], []) + + if test "$HAS_PKGCONFIG" = "yes"; then + AC_SUBST(hpxcompfound) + + PKG_CHECK_EXISTS([hpx_component],[hpxcompfound=yes],[hpxcompfound=no]) + + if test "$hpxcompfound" = "no"; then + PKG_CHECK_EXISTS([hpx_component_relwithdebinfo],[],[ + AC_MSG_ERROR([required library HPX not found, check PKG_CONFIG_PATH]) + ]) + PKG_CHECK_MODULES([HPX], [hpx_component_relwithdebinfo]) + else + PKG_CHECK_MODULES([HPX], [hpx_component]) + fi + + AC_SUBST(HPX_CXXFLAGS, ["$HPX_CFLAGS"]) + AC_DEFINE(HAVE_HPX,1,[Define to enable HPX]) + else + AC_LANG_PUSH([C++]) + + AC_SUBST(HAVE_HPX_HPP) + AC_CHECK_HEADER(hpx/hpx.hpp,[HAVE_HPX_HPP=1],[HAVE_HPX_HPP=0]) + if test "$HAVE_HPX_HPP" = "0"; then + AC_MSG_ERROR([required HPX not found, check CXXFLAGS]) + fi + + AC_SUBST(HAVE_HPX) + AC_LINK_IFELSE( + [AC_LANG_PROGRAM([#include ], + [hpx::init(); hpx::finalize();])], + [HAVE_HPX=1], + [HAVE_HPX=0]) + + if test "$HAVE_HPX" = "0"; then + AC_MSG_ERROR([required library HPX not found, check LD_LIBRARY_PATH]) + fi + + HPX_LIBS="-lhpx -lhpx_core" + AC_LANG_POP([C++]) + fi + +fi + AC_ARG_ENABLE(threads, [AC_HELP_STRING([--enable-threads],[compile FFTW SMP threads library])], enable_threads=$enableval, enable_threads=no) if test "$enable_threads" = "yes"; then @@ -721,6 +772,7 @@ fi AC_SUBST(THREADLIBS) AM_CONDITIONAL(THREADS, test "$enable_threads" = "yes") AM_CONDITIONAL(OPENMP, test "$enable_openmp" = "yes") +AM_CONDITIONAL(HPX, test "$enable_hpx" = "yes") AM_CONDITIONAL(SMP, test "$enable_threads" = "yes" -o "$enable_openmp" = "yes") AM_CONDITIONAL(COMBINED_THREADS, test x"$with_combined_threads" = xyes) diff --git a/kernel/cycle.h b/kernel/cycle.h index 16dfdc98f..59ba9517d 100644 --- a/kernel/cycle.h +++ b/kernel/cycle.h @@ -1,6 +1,7 @@ /* * Copyright (c) 2003, 2007-14 Matteo Frigo * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * Copyright (c) 2024 Christopher Taylor, Tactical Computing Labs, LLC * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the @@ -562,3 +563,26 @@ static inline ticks getticks(void) INLINE_ELAPSED(inline) #define HAVE_TICK_COUNTER #endif + +/*----------------------------------------------------------------*/ +/* + * RISC_V 64-bit cycle counter (RV64G) + */ +#if defined(__riscv) && (__riscv_xlen == 64) && !defined(HAVE_TICK_COUNTER) + +typedef unsigned long ticks; + +static __inline__ ticks getticks(void) +{ + unsigned long cycles; + + __asm__ __volatile__ ("rdcycle %0" : "=r" (cycles)); + + /* no input, nothing else clobbered */ + return cycles; +} + +INLINE_ELAPSED(inline) +#define HAVE_TICK_COUNTER + +#endif diff --git a/mpi/Makefile.am b/mpi/Makefile.am index 8e43fac5b..2a5ddd255 100644 --- a/mpi/Makefile.am +++ b/mpi/Makefile.am @@ -32,11 +32,14 @@ mpi_bench_CFLAGS = $(PTHREAD_CFLAGS) if !COMBINED_THREADS LIBFFTWTHREADS = $(top_builddir)/threads/libfftw3@PREC_SUFFIX@_threads.la endif -else +endif if OPENMP mpi_bench_CFLAGS = $(OPENMP_CFLAGS) LIBFFTWTHREADS = $(top_builddir)/threads/libfftw3@PREC_SUFFIX@_omp.la endif +if HPX +mpi_bench_CFLAGS = $(HPX_CXXFLAGS) +LIBFFTWTHREADS = $(HPX_LIBS) $(top_builddir)/threads/libfftw3@PREC_SUFFIX@_hpx.la endif mpi_bench_SOURCES = mpi-bench.c $(top_srcdir)/tests/fftw-bench.c $(top_srcdir)/tests/hook.c diff --git a/threads/Makefile.am b/threads/Makefile.am index f5c170a9a..a7497c169 100644 --- a/threads/Makefile.am +++ b/threads/Makefile.am @@ -7,14 +7,22 @@ else FFTWOMPLIB = endif +if HPX +FFTWHPXLIB = libfftw3@PREC_SUFFIX@_hpx.la +else +FFTWHPXLIB = +endif + if THREADS if COMBINED_THREADS noinst_LTLIBRARIES = libfftw3@PREC_SUFFIX@_threads.la -else +endif +if OPENMP lib_LTLIBRARIES = libfftw3@PREC_SUFFIX@_threads.la $(FFTWOMPLIB) endif -else -lib_LTLIBRARIES = $(FFTWOMPLIB) +if HPX +lib_LTLIBRARIES = libfftw3@PREC_SUFFIX@_threads.la $(FFTWHPXLIB) +endif endif libfftw3@PREC_SUFFIX@_threads_la_SOURCES = api.c conf.c threads.c \ @@ -26,6 +34,7 @@ if !COMBINED_THREADS libfftw3@PREC_SUFFIX@_threads_la_LIBADD = ../libfftw3@PREC_SUFFIX@.la endif +if OPENMP libfftw3@PREC_SUFFIX@_omp_la_SOURCES = api.c conf.c openmp.c \ threads.h dft-vrank-geq1.c ct.c rdft-vrank-geq1.c hc2hc.c \ vrank-geq1-rdft2.c f77api.c f77funcs.h @@ -34,3 +43,16 @@ libfftw3@PREC_SUFFIX@_omp_la_LDFLAGS = -version-info @SHARED_VERSION_INFO@ if !COMBINED_THREADS libfftw3@PREC_SUFFIX@_omp_la_LIBADD = ../libfftw3@PREC_SUFFIX@.la endif +endif + +if HPX +libfftw3@PREC_SUFFIX@_hpx_la_SOURCES = api.c conf.c hpx.cpp \ +threads.h dft-vrank-geq1.c ct.c rdft-vrank-geq1.c hc2hc.c \ +vrank-geq1-rdft2.c f77api.c f77funcs.h +libfftw3@PREC_SUFFIX@_hpx_la_CXXFLAGS = $(HPX_CXXFLAGS) +libfftw3@PREC_SUFFIX@_hpx_la_CFLAGS = $(AM_CFLAGS) +libfftw3@PREC_SUFFIX@_hpx_la_LDFLAGS = -version-info @SHARED_VERSION_INFO@ +if !COMBINED_THREADS +libfftw3@PREC_SUFFIX@_hpx_la_LIBADD = ../libfftw3@PREC_SUFFIX@.la +endif +endif diff --git a/threads/hpx.cpp b/threads/hpx.cpp new file mode 100644 index 000000000..439d635c8 --- /dev/null +++ b/threads/hpx.cpp @@ -0,0 +1,167 @@ +/* + * Copyright (c) 2003, 2007-14 Matteo Frigo + * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology + * Copyright (c) 2023 Tactical Computing Labs, LLC (Christopher Taylor) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* openmp.c: thread spawning via HPX */ + +#include "threads/threads.h" +#include "api/api.h" + +#if !defined(HAVE_HPX) + +#error HPX enabled but not using HPX + +#else + +#include +#include +#include +#include +#include + +#endif + +extern "C" { + +int X(ithreads_init)(void) +{ + const char * nthreads_cstr = std::getenv("FFTW3_HPX_NTHREADS"); + if(nthreads_cstr == nullptr) { + return hpx::start( nullptr, 0, nullptr ); + } + + std::string count(nthreads_cstr); + std::string thread_arg = "--hpx:threads=" + count; + hpx::init_params params; + params.cfg = { thread_arg }; + return hpx::start(nullptr, 0, nullptr, params); +} + +/* Distribute a loop from 0 to loopmax-1 over nthreads threads. + proc(d) is called to execute a block of iterations from d->min + to d->max-1. d->thr_num indicate the number of the thread + that is executing proc (from 0 to nthreads-1), and d->data is + the same as the data parameter passed to X(spawn_loop). + + This function returns only after all the threads have completed. */ +void X(spawn_loop)(int loopmax, int nthr, spawn_function proc, void *data) +{ + int block_size; + spawn_data d; + int i; + + A(loopmax >= 0); + A(nthr > 0); + A(proc); + + if (!loopmax) return; + + /* Choose the block size and number of threads in order to (1) + minimize the critical path and (2) use the fewest threads that + achieve the same critical path (to minimize overhead). + e.g. if loopmax is 5 and nthr is 4, we should use only 3 + threads with block sizes of 2, 2, and 1. */ + block_size = (loopmax + nthr - 1) / nthr; + nthr = (loopmax + block_size - 1) / block_size; + + if (X(spawnloop_callback)) { /* user-defined spawnloop backend */ + spawn_data *sdata; + STACK_MALLOC(spawn_data *, sdata, sizeof(spawn_data) * nthr); + for (i = 0; i < nthr; ++i) { + spawn_data *d = &sdata[i]; + d->max = (d->min = i * block_size) + block_size; + if (d->max > loopmax) + d->max = loopmax; + d->thr_num = i; + d->data = data; + } + X(spawnloop_callback)(proc, sdata, sizeof(spawn_data), nthr, X(spawnloop_callback_data)); + STACK_FREE(sdata); + return; + } + + std::vector> futures; + futures.reserve(nthr); + std::vector sdata(nthr, d); + + hpx::future fut = + hpx::threads::run_as_hpx_thread([&d, &data, loopmax, nthr, block_size, &proc, &futures, &sdata]() -> hpx::future + { + for (int tid = 1; tid < nthr; ++tid) + { + futures.push_back(hpx::async([tid, &sdata, &data, &proc, block_size, loopmax]() + { + sdata[tid].max = (sdata[tid].min = tid * block_size) + block_size; + if (sdata[tid].max > loopmax) { + sdata[tid].max = loopmax; + } + sdata[tid].thr_num = tid; + sdata[tid].data = data; + proc(&sdata[tid]); + })); + } + + { + sdata[0].max = (sdata[0].min = 0) + block_size; + if (sdata[0].max > loopmax) { + sdata[0].max = loopmax; + } + sdata[0].thr_num = 0; + sdata[0].data = data; + proc(&sdata[0]); + } + + hpx::wait_all(futures); + return hpx::make_ready_future(); + }); + + fut.wait(); +} + +void X(threads_cleanup)(void) +{ + hpx::post([]() { hpx::finalize(); }); + hpx::stop(); +} + +static hpx::counting_semaphore<> planner_semaphore = hpx::counting_semaphore<>(1); +static hpx::mutex install_planner_hooks_mutex; +static bool planner_hooks_installed = false; + +static void lock_planner_mutex(void) +{ + planner_semaphore.acquire(); +} + +static void unlock_planner_mutex(void) +{ + planner_semaphore.release(); +} + +void X(threads_register_planner_hooks)(void) +{ + std::lock_guard lkg(install_planner_hooks_mutex); + if (!planner_hooks_installed) { + X(set_planner_hooks)(lock_planner_mutex, unlock_planner_mutex); + planner_hooks_installed = true; + } +} + +} // end extern "C" diff --git a/threads/threads.h b/threads/threads.h index e48db3fbc..738a22d3d 100644 --- a/threads/threads.h +++ b/threads/threads.h @@ -17,6 +17,11 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA * */ +#ifdef __cplusplus +#if defined(HAVE_HPX) +#pragma once +#endif +#endif #ifndef __THREADS_H__ #define __THREADS_H__ @@ -32,10 +37,23 @@ typedef struct { typedef void *(*spawn_function) (spawn_data *); +#ifdef __cplusplus +#if defined(HAVE_HPX) +extern "C" { +#endif +#endif + void X(spawn_loop)(int loopmax, int nthreads, spawn_function proc, void *data); int X(ithreads_init)(void); void X(threads_cleanup)(void); +void X(threads_register_planner_hooks)(void); + +#ifdef __cplusplus +#if defined(HAVE_HPX) +} // end extern "C" +#endif +#endif typedef void (*spawnloop_function)(spawn_function, spawn_data *, size_t, int, void *); extern spawnloop_function X(spawnloop_callback); @@ -55,6 +73,5 @@ hc2hc_solver *X(mksolver_hc2hc_threads)(size_t size, INT r, hc2hc_mkinferior mkc void X(threads_conf_standard)(planner *p); void X(threads_register_hooks)(void); void X(threads_unregister_hooks)(void); -void X(threads_register_planner_hooks)(void); #endif /* __THREADS_H__ */