From 571c5695f48d757f69913055a5258545247ca4be Mon Sep 17 00:00:00 2001 From: Peter Doak Date: Wed, 10 Sep 2025 17:25:18 -0400 Subject: [PATCH] this gets us passing tests on CUDA 12.8 --- include/dca/linalg/lapack/magma.hpp | 34 ++-- include/dca/linalg/matrixop.hpp | 23 ++- include/dca/linalg/util/cast_gpu.hpp | 62 +++++-- test/unit/linalg/CMakeLists.txt | 2 +- test/unit/linalg/matrixop_cpu_gpu_test.cpp | 5 +- test/unit/linalg/matrixop_real_gpu_test.cpp | 13 +- test/unit/linalg/util/CMakeLists.txt | 2 +- .../unit/linalg/util/complex_op_cuda_test.cpp | 13 +- .../four_point_parameters/CMakeLists.txt | 2 +- tools/emacs/dca-style.el | 154 +++++++++--------- 10 files changed, 180 insertions(+), 130 deletions(-) diff --git a/include/dca/linalg/lapack/magma.hpp b/include/dca/linalg/lapack/magma.hpp index bc43564bf..fe2da8bb0 100644 --- a/include/dca/linalg/lapack/magma.hpp +++ b/include/dca/linalg/lapack/magma.hpp @@ -22,7 +22,6 @@ #endif #include "dca/linalg/lapack/lapack.hpp" - #include "dca/linalg/util/cast_gpu.hpp" // C++ wrappers @@ -103,7 +102,7 @@ inline void getri_gpu(int n, std::complex* a, int lda, int* ipiv, std::co inline void getri_gpu(int n, std::complex* a, int lda, int* ipiv, std::complex* work, int lwork) { checkErrorsCudaDebug(); - + auto cu_a = util::castMAGMAComplex(a); auto cu_work = util::castMAGMAComplex(work); @@ -151,8 +150,9 @@ inline void magmablas_gemm_vbatched(const char transa, const char transb, int* m int* ldc, const int batch_count, const magma_queue_t queue) { using util::castMAGMAComplex; magmablas_cgemm_vbatched(toMagmaTrans(transa), toMagmaTrans(transb), m, n, k, - *castMAGMAComplex(&alpha), castMAGMAComplex(a), lda, castMAGMAComplex(b), - ldb, *castMAGMAComplex(&beta), castMAGMAComplex(c), ldc, batch_count, queue); + convertToMagmaComplex(alpha), castMAGMAComplex(a), lda, + castMAGMAComplex(b), ldb, convertToMagmaComplex(beta), + castMAGMAComplex(c), ldc, batch_count, queue); checkErrorsCudaDebug(); } inline void magmablas_gemm_vbatched(const char transa, const char transb, int* m, int* n, int* k, @@ -163,8 +163,9 @@ inline void magmablas_gemm_vbatched(const char transa, const char transb, int* m int* ldc, const int batch_count, const magma_queue_t queue) { using util::castMAGMAComplex; magmablas_zgemm_vbatched(toMagmaTrans(transa), toMagmaTrans(transb), m, n, k, - *castMAGMAComplex(&alpha), castMAGMAComplex(a), lda, castMAGMAComplex(b), - ldb, *castMAGMAComplex(&beta), castMAGMAComplex(c), ldc, batch_count, queue); + convertToMagmaComplex(alpha), castMAGMAComplex(a), lda, + castMAGMAComplex(b), ldb, convertToMagmaComplex(beta), + castMAGMAComplex(c), ldc, batch_count, queue); checkErrorsCudaDebug(); } @@ -202,8 +203,8 @@ inline void magmablas_gemm_vbatched_max_nocheck( using util::castMAGMAComplex; magmablas_cgemm_vbatched_max_nocheck( toMagmaTrans(transa), toMagmaTrans(transb), m, n, k, *castMAGMAComplex(alpha), - castMAGMAComplex(a), lda, castMAGMAComplex(b), ldb, *castMAGMAComplex(beta), castMAGMAComplex(c), - ldc, batch_count, m_max, n_max, k_max, queue); + castMAGMAComplex(a), lda, castMAGMAComplex(b), ldb, *castMAGMAComplex(beta), + castMAGMAComplex(c), ldc, batch_count, m_max, n_max, k_max, queue); checkErrorsCudaDebug(); } @@ -214,9 +215,9 @@ inline void magmablas_gemm_vbatched_max_nocheck( const int m_max, const int n_max, const int k_max, magma_queue_t queue) { using util::castMAGMAComplex; magmablas_zgemm_vbatched_max_nocheck( - toMagmaTrans(transa), toMagmaTrans(transb), m, n, k, *castMAGMAComplex(alpha), - castMAGMAComplex(a), lda, castMAGMAComplex(b), ldb, *castMAGMAComplex(beta), castMAGMAComplex(c), - ldc, batch_count, m_max, n_max, k_max, queue); + toMagmaTrans(transa), toMagmaTrans(transb), m, n, k, convertToMagmaType(alpha), + castMAGMAComplex(a), lda, castMAGMAComplex(b), ldb, convertToMagmaType(beta), + castMAGMAComplex(c), ldc, batch_count, m_max, n_max, k_max, queue); checkErrorsCudaDebug(); } @@ -246,8 +247,9 @@ inline void magmablas_gemm_batched(const char transa, const char transb, const i const int ldc, const int batch_count, const magma_queue_t queue) { using util::castMAGMAComplex; magmablas_cgemm_batched(toMagmaTrans(transa), toMagmaTrans(transb), m, n, k, - *castMAGMAComplex(alpha), castMAGMAComplex(a), lda, castMAGMAComplex(b), ldb, - *castMAGMAComplex(beta), castMAGMAComplex(c), ldc, batch_count, queue); + convertToMagmaType(alpha), castMAGMAComplex(a), lda, castMAGMAComplex(b), + ldb, convertToMagmaType(beta), castMAGMAComplex(c), ldc, batch_count, + queue); checkErrorsCudaDebug(); } inline void magmablas_gemm_batched(const char transa, const char transb, const int m, const int n, @@ -256,10 +258,10 @@ inline void magmablas_gemm_batched(const char transa, const char transb, const i const std::complex* const* b, const int ldb, const std::complex beta, std::complex** c, const int ldc, const int batch_count, const magma_queue_t queue) { - using util::castMAGMAComplex; + using dca::util::castMagmaType; magmablas_zgemm_batched(toMagmaTrans(transa), toMagmaTrans(transb), m, n, k, - *castMAGMAComplex(alpha), castMAGMAComplex(a), lda, castMAGMAComplex(b), ldb, - *castMAGMAComplex(beta), castMAGMAComplex(c), ldc, batch_count, queue); + convertToMagmaType(alpha), castMagmaType(a), lda, castMagmaType(b), ldb, + convertToMagmaType(beta), castMagmaType(c), ldc, batch_count, queue); checkErrorsCudaDebug(); } diff --git a/include/dca/linalg/matrixop.hpp b/include/dca/linalg/matrixop.hpp index cc5d8c32e..9db5f6731 100644 --- a/include/dca/linalg/matrixop.hpp +++ b/include/dca/linalg/matrixop.hpp @@ -223,25 +223,36 @@ auto difference(const Matrix& a, const Matrix -auto difference(const Matrix& a, const Matrix& b, + +template +auto difference(const Matrix& a, const Matrix& b, double diff_threshold = 1e-3) { Matrix cp_a(a); return difference(cp_a, b, diff_threshold); } -template -auto difference(const Matrix& a, const Matrix& b, + +template +auto difference(const Matrix& a, const Matrix& b, double diff_threshold = 1e-3) { Matrix cp_b(b); return difference(a, cp_b, diff_threshold); } +template +auto difference(const Matrix& a, const Matrix& b, + double diff_threshold = 1e-3) { + Matrix cp_a(a); + Matrix cp_b(b); + return difference(cp_a, cp_b, diff_threshold); +} + // Returns the real part of a matrix. // In: a // TODO test. @@ -314,7 +325,7 @@ void inverse(MatrixType>& mat, Vector class ALLOC, +template class ALLOC, template class MatrixType> void inverse(MatrixType>& mat) { Vector ipiv; diff --git a/include/dca/linalg/util/cast_gpu.hpp b/include/dca/linalg/util/cast_gpu.hpp index 130435403..b764e0c85 100644 --- a/include/dca/linalg/util/cast_gpu.hpp +++ b/include/dca/linalg/util/cast_gpu.hpp @@ -18,14 +18,12 @@ #include "dca/config/haves_defines.hpp" #include "dca/platform/dca_gpu_complex.h" +#include "dca/util/type_mapping.hpp" #include -namespace dca { -namespace linalg { -namespace util { -// dca::linalg::util:: +namespace dca::linalg::util { -#if defined(DCA_HAVE_CUDA) +#if defined(DCA_HAVE_CUDA) // returns a cuComplex pointer. inline cuComplex** castCudaComplex(std::complex** ptr) { return reinterpret_cast(ptr); @@ -139,16 +137,16 @@ inline const magmaFloatComplex* const* castMAGMAComplex(const std::complex* ptr) { return reinterpret_cast(ptr); -} +} inline const magmaFloatComplex* castMAGMAComplex(const std::complex& el) { return castMAGMAComplex(&el); } #ifdef DCA_HAVE_CUDA - #define cublasDoubleComplex cuDoubleComplex - #define cublasComplex cuComplex +#define cublasDoubleComplex cuDoubleComplex +#define cublasComplex cuComplex #endif - + inline cublasDoubleComplex** castCUBLASComplex(std::complex** ptr) { return reinterpret_cast(ptr); } @@ -181,11 +179,10 @@ inline const cublasComplex* const* castCUBLASComplex(const std::complex* } inline const cublasComplex* castCUBLASComplex(const std::complex* ptr) { return reinterpret_cast(ptr); -} +} inline const cublasComplex* castCUBLASComplex(const std::complex& el) { return castCUBLASComplex(&el); } - // Provides a templated typedef. namespace details { @@ -201,13 +198,46 @@ struct ComplexContainer { using type = cuComplex; }; } // namespace details -// dca::linalg::util:: - template using CudaComplex = typename details::ComplexContainer::type; +} // namespace dca::linalg::util + +inline double2 convertToMagmaType(std::complex var) { + return {reinterpret_cast(var)[0], reinterpret_cast(var)[1]}; +} + +inline float2 convertToMagmaType(std::complex var) { + return {reinterpret_cast(var)[0], reinterpret_cast(var)[1]}; +} + +namespace dca::util { +template +using MAGMATypeMap = typename std::disjunction< + OnTypesEqual, OnTypesEqual, OnTypesEqual, + OnTypesEqual, OnTypesEqual, + OnTypesEqual, OnTypesEqual, + OnTypesEqual, OnTypesEqual, + OnTypesEqual, + OnTypesEqual*, magmaDoubleComplex*>, + OnTypesEqual**, magmaFloatComplex**>, + OnTypesEqual**, magmaDoubleComplex**>, + OnTypesEqual*, magmaFloatComplex*>, + OnTypesEqual, OnTypesEqual, + OnTypesEqual*, const magmaDoubleComplex*>, + OnTypesEqual*, const magmaFloatComplex*>, + OnTypesEqual&, const magmaDoubleComplex&>, + OnTypesEqual&, const magmaFloatComplex&>, + OnTypesEqual**, const magmaFloatComplex**>, + OnTypesEqual**, const magmaDoubleComplex**>, + OnTypesEqual* const*, const magmaFloatComplex* const*>, + OnTypesEqual* const*, const magmaDoubleComplex* const*>, + default_type>::type; + +template +__device__ __host__ MAGMATypeMap castMagmaType(T var) { + return reinterpret_cast>(var); +} -} // namespace util -} // namespace linalg -} // namespace dca +} // namespace dca::util #endif // DCA_LINALG_UTIL_CAST_CUDA_HPP diff --git a/test/unit/linalg/CMakeLists.txt b/test/unit/linalg/CMakeLists.txt index daf550d02..840cfc3fc 100644 --- a/test/unit/linalg/CMakeLists.txt +++ b/test/unit/linalg/CMakeLists.txt @@ -41,7 +41,7 @@ dca_add_gtest(matrixop_cpu_gpu_test dca_add_gtest(matrixop_real_gpu_test GTEST_MAIN CUDA - LIBS ${DCA_LIBS} lapack gpu_utils magma::magma lapack_kernels blas_kernels) + LIBS ${DCA_LIBS} lapack gpu_utils magma::magma BLAS::BLAS lapack_kernels blas_kernels) # lapack_kernels blas_kernels lapack_kernels dca_add_gtest(matrixop_complex_gpu_test diff --git a/test/unit/linalg/matrixop_cpu_gpu_test.cpp b/test/unit/linalg/matrixop_cpu_gpu_test.cpp index 5e06fcf02..8a1324fce 100644 --- a/test/unit/linalg/matrixop_cpu_gpu_test.cpp +++ b/test/unit/linalg/matrixop_cpu_gpu_test.cpp @@ -18,6 +18,7 @@ #include "dca/testing/gtest_h_w_warning_blocking.h" #include "dca/linalg/blas/blas3.hpp" #include "dca/linalg/matrix.hpp" +#include "dca/linalg/util/allocators/pinned_allocator.hpp" #include "cpu_test_util.hpp" #include "gpu_test_util.hpp" @@ -28,14 +29,14 @@ TEST(MatrixopCPUGPUTest, difference) { auto val_a = [](int i, int j) { return 10 * i + j; }; - dca::linalg::Matrix a(size2_a); + dca::linalg::Matrix> a(size2_a); testing::setMatrixElements(a, val_a); dca::linalg::Matrix da(a); for (int sg : {1, -1}) for (int ia : {0, 1, 4}) for (int ja : {0, 2, 3}) { - dca::linalg::Matrix b(a); + dca::linalg::Matrix> b(a); b(ia, ja) += sg * diff; double err = std::abs(epsilon * b(ia, ja)); diff --git a/test/unit/linalg/matrixop_real_gpu_test.cpp b/test/unit/linalg/matrixop_real_gpu_test.cpp index e5ce38e6d..2e32e207a 100644 --- a/test/unit/linalg/matrixop_real_gpu_test.cpp +++ b/test/unit/linalg/matrixop_real_gpu_test.cpp @@ -31,7 +31,8 @@ class MatrixopRealGPUTest : public ::testing::Test { static const ScalarType epsilon; }; template -const ScalarType MatrixopRealGPUTest::epsilon = std::numeric_limits::epsilon(); +const ScalarType MatrixopRealGPUTest::epsilon = + std::numeric_limits::epsilon(); typedef ::testing::Types FloatingPointTypes; TYPED_TEST_CASE(MatrixopRealGPUTest, FloatingPointTypes); @@ -765,22 +766,26 @@ TEST(MatrixopGPUTest, Difference) { auto val_a = [](int i, int j) { return 10 * i + j; }; - dca::linalg::Matrix a(size2_a); + dca::linalg::Matrix> a(size2_a); testing::setMatrixElements(a, val_a); dca::linalg::Matrix da(a); for (int sg : {1, -1}) for (int ia : {0, 1, 4}) for (int ja : {0, 2, 3}) { - dca::linalg::Matrix b(a); + dca::linalg::Matrix> b(a); b(ia, ja) += sg * diff; double err = std::abs(epsilon * b(ia, ja)); dca::linalg::Matrix db(b); - + // To make this clear the difference calls are expected to show + // differences! EXPECT_NEAR(diff, dca::linalg::matrixop::difference(da, db, 2 * diff), err); EXPECT_NEAR(diff, dca::linalg::matrixop::difference(da, db, diff + err), err); auto diffcalc = dca::linalg::matrixop::difference(da, db, 2 * diff); EXPECT_NEAR(diff, dca::linalg::matrixop::difference(da, db, diffcalc), err); + // This will result on output even though we expect and want a + // throw. + std::cerr << "difference output expected below\n"; EXPECT_THROW(dca::linalg::matrixop::difference(da, db, diffcalc - err), std::logic_error); } } diff --git a/test/unit/linalg/util/CMakeLists.txt b/test/unit/linalg/util/CMakeLists.txt index d5d54352e..4eef721ed 100644 --- a/test/unit/linalg/util/CMakeLists.txt +++ b/test/unit/linalg/util/CMakeLists.txt @@ -29,7 +29,7 @@ if(DCA_HAVE_CUDA) dca_add_gtest(complex_op_cuda_test GTEST_MAIN CUDA - LIBS ${DCA_GPU_LIBS}) + LIBS ${DCA_GPU_LIBS} magma::magma BLAS::BLAS) dca_gpu_runtime_link(complex_op_cuda_test) endif() diff --git a/test/unit/linalg/util/complex_op_cuda_test.cpp b/test/unit/linalg/util/complex_op_cuda_test.cpp index 61ec68db5..78830729b 100644 --- a/test/unit/linalg/util/complex_op_cuda_test.cpp +++ b/test/unit/linalg/util/complex_op_cuda_test.cpp @@ -13,23 +13,24 @@ #include "dca/linalg/util/complex_operators_cuda.cu.hpp" #include #include "dca/testing/gtest_h_w_warning_blocking.h" +#include TEST(ComplexOpCuda, Assign) { double2 d1_a{0.0, 0.0}; - double2 d2_a{1.0,2.0}; - dca::linalg::assign(d1_a,d2_a); + double2 d2_a{1.0, 2.0}; + dca::linalg::assign(d1_a, d2_a); EXPECT_EQ(d1_a.x, d2_a.x); EXPECT_EQ(d1_a.y, d2_a.y); - std::complex c1{1.3,2.4}; - dca::linalg::assign(d1_a,c1); + std::complex c1{1.3, 2.4}; + dca::linalg::assign(d1_a, c1); EXPECT_EQ(d1_a.x, c1.real()); EXPECT_EQ(d1_a.y, c1.imag()); - std::complex c2{0.0,0.5}; + std::complex c2{0.0, 0.5}; dca::linalg::assign(c2, d1_a); EXPECT_EQ(c2.real(), c1.real()); - EXPECT_EQ(c2.imag(),c1.imag()); + EXPECT_EQ(c2.imag(), c1.imag()); std::int8_t i81 = 1; dca::linalg::assign(d1_a, i81); diff --git a/test/unit/phys/parameters/four_point_parameters/CMakeLists.txt b/test/unit/phys/parameters/four_point_parameters/CMakeLists.txt index 8b5395b3d..b169f65f4 100644 --- a/test/unit/phys/parameters/four_point_parameters/CMakeLists.txt +++ b/test/unit/phys/parameters/four_point_parameters/CMakeLists.txt @@ -3,4 +3,4 @@ # why this test pulls in a bunch of magma references is a mystery dca_add_gtest(four_point_parameters_test GTEST_MAIN - LIBS json enumerations ${DCA_GPU_LIBS}) + LIBS json enumerations ${DCA_GPU_LIBS} magma::magma) diff --git a/tools/emacs/dca-style.el b/tools/emacs/dca-style.el index f8c037533..8ecd7f1f8 100644 --- a/tools/emacs/dca-style.el +++ b/tools/emacs/dca-style.el @@ -1,4 +1,4 @@ -;;; dca-style.el -- defines a c-style for DCA++ +;;; dca-style.el -- defines a c-style for DCA++ -*- lexical-binding: t; -*- ;;; License: ;; // Copyright (C) 2021 ETH Zurich @@ -32,82 +32,82 @@ ;;; Code: (defconst dca-c-style - '((c-basic-offset . 2) ; Guessed value - (c-offsets-alist - (access-label . 0) ; Guessed value - (arglist-cont . 0) ; Guessed value - (arglist-intro . ++) ; Guessed value - (block-close . 0) ; Guessed value - (catch-clause . 0) ; Guessed value - (class-close . 0) ; Guessed value - (defun-block-intro . +) ; Guessed value - (defun-close . 0) ; Guessed value - (else-clause . 0) ; Guessed value - (inclass . +) ; Guessed value - (inline-close . 0) ; Guessed value - (innamespace . 0) ; Guessed value - (member-init-cont . c-lineup-multi-inher) ; Guessed value - (member-init-intro . +) ; Guessed value - (namespace-close . 0) ; Guessed value - (statement . 0) ; Guessed value - (statement-block-intro . +) ; Guessed value - (statement-cont . +) ; Guessed value - (substatement . +) ; Guessed value - (topmost-intro . +) ; Guessed value - (topmost-intro-cont . 0) ; Guessed value - (annotation-top-cont . 0) - (annotation-var-cont . +) - (arglist-close . c-lineup-close-paren) - (arglist-cont-nonempty . c-lineup-arglist) - (block-open . 0) - (brace-entry-open . 0) - (brace-list-close . 0) - (brace-list-entry . c-lineup-under-anchor) - (brace-list-intro . +) - (brace-list-open . 0) - (c . c-lineup-C-comments) - (case-label . 0) - (class-open . 0) - (comment-intro . c-lineup-comment) - (composition-close . 0) - (composition-open . 0) - (cpp-define-intro c-lineup-cpp-define +) - (cpp-macro . -1000) - (cpp-macro-cont . +) - (defun-open . 0) - (do-while-closure . 0) - (extern-lang-close . 0) - (extern-lang-open . 0) - (friend . 0) - (func-decl-cont . +) - (incomposition . +) - (inexpr-class . +) - (inexpr-statement . +) - (inextern-lang . +) - (inher-cont . c-lineup-multi-inher) - (inher-intro . +) - (inlambda . c-lineup-inexpr-block) - (inline-open . +) - (inmodule . +) - (knr-argdecl . 0) - (knr-argdecl-intro . +) - (label . 2) - (lambda-intro-cont . +) - (module-close . 0) - (module-open . 0) - (namespace-open . 0) - (objc-method-args-cont . c-lineup-ObjC-method-args) - (objc-method-call-cont c-lineup-ObjC-method-call-colons c-lineup-ObjC-method-call +) - (objc-method-intro . - [0]) - (statement-case-intro . +) - (statement-case-open . 0) - (stream-op . c-lineup-streamop) - (string . -1000) - (substatement-label . 2) - (substatement-open . +) - (template-args-cont c-lineup-template-args +) - (cpp-macro . -1000))) + '((c-basic-offset . 2) ; Guessed value + (c-offsets-alist + (access-label . 0) ; Guessed value + (arglist-cont . 0) ; Guessed value + (arglist-intro . ++) ; Guessed value + (block-close . 0) ; Guessed value + (catch-clause . 0) ; Guessed value + (class-close . 0) ; Guessed value + (defun-block-intro . +) ; Guessed value + (defun-close . 0) ; Guessed value + (else-clause . 0) ; Guessed value + (inclass . +) ; Guessed value + (inline-close . 0) ; Guessed value + (innamespace . 0) ; Guessed value + (member-init-cont . c-lineup-multi-inher) ; Guessed value + (member-init-intro . +) ; Guessed value + (namespace-close . 0) ; Guessed value + (statement . 0) ; Guessed value + (statement-block-intro . +) ; Guessed value + (statement-cont . +) ; Guessed value + (substatement . +) ; Guessed value + (topmost-intro . +) ; Guessed value + (topmost-intro-cont . 0) ; Guessed value + (annotation-top-cont . 0) + (annotation-var-cont . +) + (arglist-close . c-lineup-close-paren) + (arglist-cont-nonempty . c-lineup-arglist) + (block-open . 0) + (brace-entry-open . 0) + (brace-list-close . 0) + (brace-list-entry . c-lineup-under-anchor) + (brace-list-intro . +) + (brace-list-open . 0) + (c . c-lineup-C-comments) + (case-label . 0) + (class-open . 0) + (comment-intro . c-lineup-comment) + (composition-close . 0) + (composition-open . 0) + (cpp-define-intro c-lineup-cpp-define +) + (cpp-macro . -1000) + (cpp-macro-cont . +) + (defun-open . 0) + (do-while-closure . 0) + (extern-lang-close . 0) + (extern-lang-open . 0) + (friend . 0) + (func-decl-cont . +) + (incomposition . +) + (inexpr-class . +) + (inexpr-statement . +) + (inextern-lang . +) + (inher-cont . c-lineup-multi-inher) + (inher-intro . +) + (inlambda . c-lineup-inexpr-block) + (inline-open . +) + (inmodule . +) + (knr-argdecl . 0) + (knr-argdecl-intro . +) + (label . 2) + (lambda-intro-cont . +) + (module-close . 0) + (module-open . 0) + (namespace-open . 0) + (objc-method-args-cont . c-lineup-ObjC-method-args) + (objc-method-call-cont c-lineup-ObjC-method-call-colons c-lineup-ObjC-method-call +) + (objc-method-intro . + [0]) + (statement-case-intro . +) + (statement-case-open . 0) + (stream-op . c-lineup-streamop) + (string . -1000) + (substatement-label . 2) + (substatement-open . +) + (template-args-cont c-lineup-template-args +) + (cpp-macro . -1000))) "DCA++ C/C++ programming Style.") (c-add-style "dca" dca-c-style)