From 31e3451085ac10d710f50503b1fb40605baff1b7 Mon Sep 17 00:00:00 2001 From: Mario Emmenlauer Date: Wed, 9 Mar 2022 20:37:52 +0100 Subject: [PATCH 1/3] CMakeLists.txt: Updated version to 3.3.10 --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 298f830af..db20caa7b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -279,7 +279,7 @@ if (HAVE_AVX2) list (APPEND SOURCEFILES ${fftw_dft_simd_avx2_SOURCE} ${fftw_rdft_simd_avx2_SOURCE}) endif () -set (FFTW_VERSION 3.3.9) +set (FFTW_VERSION 3.3.10) set (PREC_SUFFIX) if (ENABLE_FLOAT) From 632233e0c98b51c13e816b3d162b9dd5c96d61f8 Mon Sep 17 00:00:00 2001 From: Mario Emmenlauer Date: Wed, 9 Mar 2022 20:31:56 +0100 Subject: [PATCH 2/3] CMake: Added ARM Neon support --- CMakeLists.txt | 48 ++++++++++++++++++++++++++++++----------------- cmake.config.h.in | 2 +- 2 files changed, 32 insertions(+), 18 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index db20caa7b..48e724f37 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -25,6 +25,7 @@ option (ENABLE_SSE "Compile with SSE instruction set support" OFF) option (ENABLE_SSE2 "Compile with SSE2 instruction set support" OFF) option (ENABLE_AVX "Compile with AVX instruction set support" OFF) option (ENABLE_AVX2 "Compile with AVX2 instruction set support" OFF) +option (ENABLE_NEON "Compile with NEON instruction set support" OFF) option (DISABLE_FORTRAN "Disable Fortran wrapper routines" OFF) @@ -192,9 +193,20 @@ if (ENABLE_AVX2) endforeach () endif () +if (ENABLE_NEON) + if (ENABLE_LONG_DOUBLE) + message (FATAL_ERROR "NEON only works in single or double precision, please disable long double support") + endif () + if (ENABLE_QUAD_PRECISION) + message (FATAL_ERROR "NEON only works in single or double precision, please disable quad precision support") + endif () + set (HAVE_NEON TRUE) +endif () + if (HAVE_SSE2 OR HAVE_AVX) set (HAVE_SIMD TRUE) endif () + file(GLOB fftw_api_SOURCE api/*.c api/*.h) file(GLOB fftw_dft_SOURCE dft/*.c dft/*.h) file(GLOB fftw_dft_scalar_SOURCE dft/scalar/*.c dft/scalar/*.h) @@ -204,6 +216,7 @@ file(GLOB fftw_dft_simd_SOURCE dft/simd/*.c dft/simd file(GLOB fftw_dft_simd_sse2_SOURCE dft/simd/sse2/*.c dft/simd/sse2/*.h) file(GLOB fftw_dft_simd_avx_SOURCE dft/simd/avx/*.c dft/simd/avx/*.h) file(GLOB fftw_dft_simd_avx2_SOURCE dft/simd/avx2/*.c dft/simd/avx2/*.h dft/simd/avx2-128/*.c dft/simd/avx2-128/*.h) +file(GLOB fftw_dft_simd_neon_SOURCE dft/simd/neon/*.c dft/simd/neon/*.h) file(GLOB fftw_kernel_SOURCE kernel/*.c kernel/*.h) file(GLOB fftw_rdft_SOURCE rdft/*.c rdft/*.h) file(GLOB fftw_rdft_scalar_SOURCE rdft/scalar/*.c rdft/scalar/*.h) @@ -219,6 +232,7 @@ file(GLOB fftw_rdft_simd_SOURCE rdft/simd/*.c rdft/sim file(GLOB fftw_rdft_simd_sse2_SOURCE rdft/simd/sse2/*.c rdft/simd/sse2/*.h) file(GLOB fftw_rdft_simd_avx_SOURCE rdft/simd/avx/*.c rdft/simd/avx/*.h) file(GLOB fftw_rdft_simd_avx2_SOURCE rdft/simd/avx2/*.c rdft/simd/avx2/*.h rdft/simd/avx2-128/*.c rdft/simd/avx2-128/*.h) +file(GLOB fftw_rdft_simd_neon_SOURCE rdft/simd/neon/*.c rdft/simd/neon/*.h) file(GLOB fftw_reodft_SOURCE reodft/*.c reodft/*.h) file(GLOB fftw_simd_support_SOURCE simd-support/*.c simd-support/*.h) @@ -279,6 +293,10 @@ if (HAVE_AVX2) list (APPEND SOURCEFILES ${fftw_dft_simd_avx2_SOURCE} ${fftw_rdft_simd_avx2_SOURCE}) endif () +if (HAVE_NEON) + list (APPEND SOURCEFILES ${fftw_dft_simd_neon_SOURCE} ${fftw_rdft_simd_neon_SOURCE}) +endif () + set (FFTW_VERSION 3.3.10) set (PREC_SUFFIX) @@ -358,9 +376,9 @@ endif () foreach(subtarget ${subtargets}) set_target_properties (${subtarget} PROPERTIES SOVERSION 3.6.9 VERSION 3) install (TARGETS ${subtarget} - RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} - LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} - ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}) + RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}) endforeach () install(TARGETS ${fftw3_lib} EXPORT FFTW3LibraryDepends @@ -381,7 +399,6 @@ if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/api/fftw3.f03.in) endif () if (BUILD_TESTS) - add_executable (bench ${fftw_libbench2_SOURCE} tests/bench.c tests/hook.c tests/fftw-bench.c) if (ENABLE_THREADS AND NOT WITH_COMBINED_THREADS) @@ -390,18 +407,15 @@ if (BUILD_TESTS) target_link_libraries (bench ${fftw3_lib}) endif () - enable_testing () if (Threads_FOUND) - macro (fftw_add_test problem) add_test (NAME ${problem} COMMAND bench -s ${problem}) endmacro () fftw_add_test (32x64) fftw_add_test (ib256) - endif () endif () @@ -413,22 +427,22 @@ set (includedir ${CMAKE_INSTALL_FULL_INCLUDEDIR}) set (VERSION ${FFTW_VERSION}) configure_file (fftw.pc.in fftw3${PREC_SUFFIX}.pc @ONLY) install (FILES - ${CMAKE_CURRENT_BINARY_DIR}/fftw3${PREC_SUFFIX}.pc - DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig - COMPONENT Development) + ${CMAKE_CURRENT_BINARY_DIR}/fftw3${PREC_SUFFIX}.pc + DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig + COMPONENT Development) # cmake file set (FFTW3_LIBRARIES "FFTW3::${fftw3_lib}") configure_file (FFTW3Config.cmake.in FFTW3${PREC_SUFFIX}Config.cmake @ONLY) configure_file (FFTW3ConfigVersion.cmake.in FFTW3${PREC_SUFFIX}ConfigVersion.cmake @ONLY) install (FILES - ${CMAKE_CURRENT_BINARY_DIR}/FFTW3${PREC_SUFFIX}Config.cmake - ${CMAKE_CURRENT_BINARY_DIR}/FFTW3${PREC_SUFFIX}ConfigVersion.cmake - DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/fftw3${PREC_SUFFIX} - COMPONENT Development) + ${CMAKE_CURRENT_BINARY_DIR}/FFTW3${PREC_SUFFIX}Config.cmake + ${CMAKE_CURRENT_BINARY_DIR}/FFTW3${PREC_SUFFIX}ConfigVersion.cmake + DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/fftw3${PREC_SUFFIX} + COMPONENT Development) export (TARGETS ${fftw3_lib} NAMESPACE FFTW3:: FILE ${PROJECT_BINARY_DIR}/FFTW3LibraryDepends.cmake) install(EXPORT FFTW3LibraryDepends - NAMESPACE FFTW3:: - DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/fftw3${PREC_SUFFIX} - COMPONENT Development) + NAMESPACE FFTW3:: + DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/fftw3${PREC_SUFFIX} + COMPONENT Development) diff --git a/cmake.config.h.in b/cmake.config.h.in index 1f4c50559..8c61b38fb 100644 --- a/cmake.config.h.in +++ b/cmake.config.h.in @@ -200,7 +200,7 @@ /* #undef HAVE_MPI */ /* Define to enable ARM NEON optimizations. */ -/* #undef HAVE_NEON */ +#cmakedefine HAVE_NEON 1 /* Define if OpenMP is enabled */ #cmakedefine HAVE_OPENMP From a2a0a788c92a4404061290e9e72b4d3144d6ea1e Mon Sep 17 00:00:00 2001 From: Mario Emmenlauer Date: Wed, 9 Mar 2022 22:10:47 +0100 Subject: [PATCH 3/3] CMakeLists.txt: Support multi-precision builds --- CMakeLists.txt | 241 +++++++++++++++++++++++++++---------------------- 1 file changed, 134 insertions(+), 107 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 48e724f37..2dc3741c8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -17,9 +17,10 @@ option (ENABLE_OPENMP "Use OpenMP for multithreading" OFF) option (ENABLE_THREADS "Use pthread for multithreading" OFF) option (WITH_COMBINED_THREADS "Merge thread library" OFF) -option (ENABLE_FLOAT "single-precision" OFF) -option (ENABLE_LONG_DOUBLE "long-double precision" OFF) -option (ENABLE_QUAD_PRECISION "quadruple-precision" OFF) +option (ENABLE_FLOAT "Build the single precision library" OFF) +option (ENABLE_DOUBLE "Build the double precision library" ON) +option (ENABLE_LONG_DOUBLE "Build the long double precision library" OFF) +option (ENABLE_QUAD_PRECISION "Build the quadruple precision library" OFF) option (ENABLE_SSE "Compile with SSE instruction set support" OFF) option (ENABLE_SSE2 "Compile with SSE2 instruction set support" OFF) @@ -299,92 +300,132 @@ endif () set (FFTW_VERSION 3.3.10) -set (PREC_SUFFIX) -if (ENABLE_FLOAT) - set (FFTW_SINGLE TRUE) - set (BENCHFFT_SINGLE TRUE) - set (PREC_SUFFIX f) -endif () +set (PRECISIONS SINGLE DOUBLE LDOUBLE QUAD) +foreach (PRECISION ${PRECISIONS}) + # Initialize all parameters to false: + foreach (TMPPRECISION ${PRECISIONS}) + set (FFTW_${TMPPRECISION} FALSE) + set (BENCHFFT_${TMPPRECISION} FALSE) + endforeach () -if (ENABLE_LONG_DOUBLE) - set (FFTW_LDOUBLE TRUE) - set (BENCHFFT_LDOUBLE TRUE) - set (PREC_SUFFIX l) -endif () + if ("${PRECISION}" STREQUAL "SINGLE") + if (NOT "${ENABLE_FLOAT}") + continue () + endif() + set (PREC_SUFFIX f) + elseif ("${PRECISION}" STREQUAL "DOUBLE") + if (NOT "${ENABLE_DOUBLE}") + continue () + endif() + set (PREC_SUFFIX) + elseif ("${PRECISION}" STREQUAL "LDOUBLE") + if (NOT "${ENABLE_LONG_DOUBLE}") + continue () + endif() + set (PREC_SUFFIX l) + elseif ("${PRECISION}" STREQUAL "QUAD") + if (NOT "${ENABLE_QUAD_PRECISION}") + continue () + endif() + set (PREC_SUFFIX q) + endif () -if (ENABLE_QUAD_PRECISION) - set (FFTW_QUAD TRUE) - set (BENCHFFT_QUAD TRUE) - set (PREC_SUFFIX q) -endif () -set (fftw3_lib fftw3${PREC_SUFFIX}) + # Enable only one precision per iteration: + set (FFTW_${PRECISION} TRUE) + set (BENCHFFT_${PRECISION} TRUE) -configure_file (cmake.config.h.in config.h @ONLY) -include_directories (${CMAKE_CURRENT_BINARY_DIR}) -if (BUILD_SHARED_LIBS) - add_definitions (-DFFTW_DLL) -endif () -add_library (${fftw3_lib} ${SOURCEFILES}) -target_include_directories (${fftw3_lib} INTERFACE $) -if (MSVC AND NOT (CMAKE_C_COMPILER_ID STREQUAL "Intel")) - target_compile_definitions (${fftw3_lib} PRIVATE /bigobj) -endif () -if (HAVE_SSE) - target_compile_options (${fftw3_lib} PRIVATE ${SSE_FLAG}) -endif () -if (HAVE_SSE2) - target_compile_options (${fftw3_lib} PRIVATE ${SSE2_FLAG}) -endif () -if (HAVE_AVX) - target_compile_options (${fftw3_lib} PRIVATE ${AVX_FLAG}) -endif () -if (HAVE_AVX2) - target_compile_options (${fftw3_lib} PRIVATE ${AVX2_FLAG}) -endif () -if (HAVE_FMA) - target_compile_options (${fftw3_lib} PRIVATE ${FMA_FLAG}) -endif () -if (HAVE_LIBM) - target_link_libraries (${fftw3_lib} m) -endif () + set (fftw3_lib fftw3${PREC_SUFFIX}) + add_library (${fftw3_lib} ${SOURCEFILES}) + list (APPEND FFTW3_LIBRARIES "${fftw3_lib}") -set (subtargets ${fftw3_lib}) + configure_file (cmake.config.h.in "${PRECISION}/config.h" @ONLY) + target_include_directories (${fftw3_lib} PRIVATE "${CMAKE_CURRENT_BINARY_DIR}/${PRECISION}") -if (Threads_FOUND) - if (WITH_COMBINED_THREADS) - target_link_libraries (${fftw3_lib} ${CMAKE_THREAD_LIBS_INIT}) - else () - add_library (${fftw3_lib}_threads ${fftw_threads_SOURCE}) - target_include_directories (${fftw3_lib}_threads INTERFACE $) - target_link_libraries (${fftw3_lib}_threads ${fftw3_lib}) - target_link_libraries (${fftw3_lib}_threads ${CMAKE_THREAD_LIBS_INIT}) - list (APPEND subtargets ${fftw3_lib}_threads) + target_include_directories (${fftw3_lib} INTERFACE $) + + if (BUILD_SHARED_LIBS) + target_compile_definitions (${fftw3_lib} PRIVATE -DFFTW_DLL) endif () -endif () -if (OPENMP_FOUND) - add_library (${fftw3_lib}_omp ${fftw_omp_SOURCE}) - target_include_directories (${fftw3_lib}_omp INTERFACE $) - target_link_libraries (${fftw3_lib}_omp ${fftw3_lib}) - target_link_libraries (${fftw3_lib}_omp ${CMAKE_THREAD_LIBS_INIT}) - list (APPEND subtargets ${fftw3_lib}_omp) - target_compile_options (${fftw3_lib}_omp PRIVATE ${OpenMP_C_FLAGS}) -endif () + if (MSVC AND NOT (CMAKE_C_COMPILER_ID STREQUAL "Intel")) + target_compile_definitions (${fftw3_lib} PRIVATE /bigobj) + endif () + if (HAVE_SSE) + target_compile_options (${fftw3_lib} PRIVATE ${SSE_FLAG}) + endif () + if (HAVE_SSE2) + target_compile_options (${fftw3_lib} PRIVATE ${SSE2_FLAG}) + endif () + if (HAVE_AVX) + target_compile_options (${fftw3_lib} PRIVATE ${AVX_FLAG}) + endif () + if (HAVE_AVX2) + target_compile_options (${fftw3_lib} PRIVATE ${AVX2_FLAG}) + endif () + if (HAVE_FMA) + target_compile_options (${fftw3_lib} PRIVATE ${FMA_FLAG}) + endif () + if (HAVE_LIBM) + target_link_libraries (${fftw3_lib} m) + endif () + + set (subtargets ${fftw3_lib}) + + if (Threads_FOUND) + if (WITH_COMBINED_THREADS) + target_link_libraries (${fftw3_lib} ${CMAKE_THREAD_LIBS_INIT}) + else () + add_library (${fftw3_lib}_threads ${fftw_threads_SOURCE}) + target_include_directories (${fftw3_lib}_threads PRIVATE "${CMAKE_CURRENT_BINARY_DIR}/${PRECISION}") + target_include_directories (${fftw3_lib}_threads INTERFACE $) + target_link_libraries (${fftw3_lib}_threads ${fftw3_lib}) + target_link_libraries (${fftw3_lib}_threads ${CMAKE_THREAD_LIBS_INIT}) + list (APPEND subtargets ${fftw3_lib}_threads) + endif () + endif () + + if (OPENMP_FOUND) + add_library (${fftw3_lib}_omp ${fftw_omp_SOURCE}) + target_include_directories (${fftw3_lib}_omp PRIVATE "${CMAKE_CURRENT_BINARY_DIR}/${PRECISION}") + target_include_directories (${fftw3_lib}_omp INTERFACE $) + target_link_libraries (${fftw3_lib}_omp ${fftw3_lib}) + target_link_libraries (${fftw3_lib}_omp ${CMAKE_THREAD_LIBS_INIT}) + list (APPEND subtargets ${fftw3_lib}_omp) + target_compile_options (${fftw3_lib}_omp PRIVATE ${OpenMP_C_FLAGS}) + endif () + + foreach(subtarget ${subtargets}) + set_target_properties (${subtarget} PROPERTIES SOVERSION 3.6.9 VERSION 3) + install (TARGETS ${subtarget} + RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}) + endforeach () + + if (BUILD_TESTS) + add_executable (bench${PREC_SUFFIX} ${fftw_libbench2_SOURCE} tests/bench.c tests/hook.c tests/fftw-bench.c) + target_include_directories (bench${PREC_SUFFIX} PRIVATE "${CMAKE_CURRENT_BINARY_DIR}/${PRECISION}") + + if (ENABLE_THREADS AND NOT WITH_COMBINED_THREADS) + target_link_libraries (bench${PREC_SUFFIX} ${fftw3_lib}_threads) + else () + target_link_libraries (bench${PREC_SUFFIX} ${fftw3_lib}) + endif () -foreach(subtarget ${subtargets}) - set_target_properties (${subtarget} PROPERTIES SOVERSION 3.6.9 VERSION 3) - install (TARGETS ${subtarget} - RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} - LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} - ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}) + enable_testing () + + if (Threads_FOUND) + macro (fftw_add_test problem) + add_test (NAME ${problem}${PREC_SUFFIX} COMMAND bench${PREC_SUFFIX} -s ${problem}) + endmacro () + + fftw_add_test (32x64) + fftw_add_test (ib256) + endif () + endif () endforeach () -install(TARGETS ${fftw3_lib} - EXPORT FFTW3LibraryDepends - RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} - LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} - ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}) install (FILES api/fftw3.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/api/fftw3.f) @@ -398,26 +439,11 @@ if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/api/fftw3.f03.in) install (FILES ${CMAKE_CURRENT_BINARY_DIR}/fftw3.f03 DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) endif () -if (BUILD_TESTS) - add_executable (bench ${fftw_libbench2_SOURCE} tests/bench.c tests/hook.c tests/fftw-bench.c) - - if (ENABLE_THREADS AND NOT WITH_COMBINED_THREADS) - target_link_libraries (bench ${fftw3_lib}_threads) - else () - target_link_libraries (bench ${fftw3_lib}) - endif () - - enable_testing () - - if (Threads_FOUND) - macro (fftw_add_test problem) - add_test (NAME ${problem} COMMAND bench -s ${problem}) - endmacro () - - fftw_add_test (32x64) - fftw_add_test (ib256) - endif () -endif () +install(TARGETS ${FFTW3_LIBRARIES} + EXPORT FFTW3LibraryDepends + RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}) # pkgconfig file set (prefix ${CMAKE_INSTALL_PREFIX}) @@ -431,18 +457,19 @@ install (FILES DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig COMPONENT Development) -# cmake file -set (FFTW3_LIBRARIES "FFTW3::${fftw3_lib}") -configure_file (FFTW3Config.cmake.in FFTW3${PREC_SUFFIX}Config.cmake @ONLY) -configure_file (FFTW3ConfigVersion.cmake.in FFTW3${PREC_SUFFIX}ConfigVersion.cmake @ONLY) +# cmake configuration file +configure_file (FFTW3Config.cmake.in FFTW3Config.cmake @ONLY) +configure_file (FFTW3ConfigVersion.cmake.in FFTW3ConfigVersion.cmake @ONLY) install (FILES - ${CMAKE_CURRENT_BINARY_DIR}/FFTW3${PREC_SUFFIX}Config.cmake - ${CMAKE_CURRENT_BINARY_DIR}/FFTW3${PREC_SUFFIX}ConfigVersion.cmake - DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/fftw3${PREC_SUFFIX} + "${CMAKE_CURRENT_BINARY_DIR}/FFTW3Config.cmake" + "${CMAKE_CURRENT_BINARY_DIR}/FFTW3ConfigVersion.cmake" + DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/fftw3" COMPONENT Development) -export (TARGETS ${fftw3_lib} NAMESPACE FFTW3:: FILE ${PROJECT_BINARY_DIR}/FFTW3LibraryDepends.cmake) +export (TARGETS ${FFTW3_LIBRARIES} + NAMESPACE FFTW3:: + FILE "${PROJECT_BINARY_DIR}/FFTW3LibraryDepends.cmake") install(EXPORT FFTW3LibraryDepends NAMESPACE FFTW3:: - DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/fftw3${PREC_SUFFIX} + DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/fftw3" COMPONENT Development)