diff --git a/.github/workflows/ci-github-actions-self-hosted.yaml b/.github/workflows/ci-github-actions-self-hosted.yaml index 3c342950a..073880006 100644 --- a/.github/workflows/ci-github-actions-self-hosted.yaml +++ b/.github/workflows/ci-github-actions-self-hosted.yaml @@ -9,8 +9,8 @@ on: - master jobs: - v100: - runs-on: [self-hosted, Linux, X64, V100, CUDA] + two_a30: + runs-on: [self-hosted, Linux, X64, A30] env: GH_JOBNAME: ${{matrix.jobname}} @@ -18,11 +18,9 @@ jobs: strategy: fail-fast: false matrix: - jobname: [ - GCC12-MPI-CUDA-Real-Full, - ] -# GCC12-MPI-NoMPI-CUDA-Real-Full, -# GCC12-MPI-NoMPI-CUDA-Real-Debug-Full, + jobname: [GCC12-MPI-CUDA-Real-Full] + # GCC12-MPI-NoMPI-CUDA-Real-Full, + # GCC12-MPI-NoMPI-CUDA-Real-Debug-Full, steps: - name: Checkout PR branch @@ -36,4 +34,3 @@ jobs: - name: Test run: test/test_automation/ci/run_step.sh test - diff --git a/include/dca/linalg/lapack/magma.hpp b/include/dca/linalg/lapack/magma.hpp index fe2da8bb0..e838ffc48 100644 --- a/include/dca/linalg/lapack/magma.hpp +++ b/include/dca/linalg/lapack/magma.hpp @@ -150,9 +150,9 @@ inline void magmablas_gemm_vbatched(const char transa, const char transb, int* m int* ldc, const int batch_count, const magma_queue_t queue) { using util::castMAGMAComplex; magmablas_cgemm_vbatched(toMagmaTrans(transa), toMagmaTrans(transb), m, n, k, - convertToMagmaComplex(alpha), castMAGMAComplex(a), lda, - castMAGMAComplex(b), ldb, convertToMagmaComplex(beta), - castMAGMAComplex(c), ldc, batch_count, queue); + convertToMagmaType(alpha), castMAGMAComplex(a), lda, castMAGMAComplex(b), + ldb, convertToMagmaType(beta), castMAGMAComplex(c), ldc, batch_count, + queue); checkErrorsCudaDebug(); } inline void magmablas_gemm_vbatched(const char transa, const char transb, int* m, int* n, int* k, @@ -163,9 +163,9 @@ inline void magmablas_gemm_vbatched(const char transa, const char transb, int* m int* ldc, const int batch_count, const magma_queue_t queue) { using util::castMAGMAComplex; magmablas_zgemm_vbatched(toMagmaTrans(transa), toMagmaTrans(transb), m, n, k, - convertToMagmaComplex(alpha), castMAGMAComplex(a), lda, - castMAGMAComplex(b), ldb, convertToMagmaComplex(beta), - castMAGMAComplex(c), ldc, batch_count, queue); + convertToMagmaType(alpha), castMAGMAComplex(a), lda, castMAGMAComplex(b), + ldb, convertToMagmaType(beta), castMAGMAComplex(c), ldc, batch_count, + queue); checkErrorsCudaDebug(); } diff --git a/include/dca/linalg/matrixop.hpp b/include/dca/linalg/matrixop.hpp index 9db5f6731..0679554f1 100644 --- a/include/dca/linalg/matrixop.hpp +++ b/include/dca/linalg/matrixop.hpp @@ -43,6 +43,7 @@ #include "dca/linalg/blas/use_device.hpp" #include "dca/linalg/lapack/use_device.hpp" #include "dca/linalg/matrix.hpp" +#include "dca/linalg/util/allocators/aligned_allocator.hpp" #include "dca/linalg/util/util_lapack.hpp" #include "dca/linalg/util/util_matrixop.hpp" #include "dca/linalg/vector.hpp" @@ -230,7 +231,6 @@ auto difference(const Matrix& a, const Matrix auto difference(const Matrix& a, const Matrix& b, double diff_threshold = 1e-3) { @@ -241,7 +241,7 @@ auto difference(const Matrix& a, const Matrix& template auto difference(const Matrix& a, const Matrix& b, double diff_threshold = 1e-3) { - Matrix cp_b(b); + Matrix cp_b(b); return difference(a, cp_b, diff_threshold); } @@ -354,7 +354,7 @@ void smallInverse(Matrix& m_inv, Vector& ipiv, break; } case 3: { - const Matrix m(m_inv); + const Matrix> m(m_inv); const Scalar det = m(0, 0) * (m(1, 1) * m(2, 2) - m(2, 1) * m(1, 2)) - m(1, 0) * (m(0, 1) * m(2, 2) - m(0, 2) * m(2, 1)) + m(2, 0) * (m(0, 1) * m(1, 2) - m(0, 2) * m(1, 1)); @@ -1005,7 +1005,8 @@ inline void multiplyDiagonalLeft(const Vector& d, template inline void multiplyDiagonalLeft(const Vector& d, const Matrix& a, Matrix& b, int thread_id = 0, int stream_id = 0) { - Vector d_gpu(d); + Vector d_gpu; + d_gpu.setAsync(d, thread_id, stream_id); multiplyDiagonalLeft(d_gpu, a, b, thread_id, stream_id); } @@ -1025,7 +1026,8 @@ inline void multiplyDiagonalRight(const Matrix& a, template inline void multiplyDiagonalRight(const Matrix& a, const Vector& d, Matrix& b, int thread_id = 0, int stream_id = 0) { - Vector d_gpu(d); + Vector d_gpu; + d_gpu.setAsync(d, thread_id, stream_id); multiplyDiagonalRight(a, d_gpu, b, thread_id, stream_id); } diff --git a/include/dca/linalg/util/cast_gpu.hpp b/include/dca/linalg/util/cast_gpu.hpp index b764e0c85..b6edab4a3 100644 --- a/include/dca/linalg/util/cast_gpu.hpp +++ b/include/dca/linalg/util/cast_gpu.hpp @@ -202,14 +202,24 @@ template using CudaComplex = typename details::ComplexContainer::type; } // namespace dca::linalg::util -inline double2 convertToMagmaType(std::complex var) { +inline magmaDoubleComplex convertToMagmaType(std::complex var) { return {reinterpret_cast(var)[0], reinterpret_cast(var)[1]}; } -inline float2 convertToMagmaType(std::complex var) { +inline magmaFloatComplex convertToMagmaType(std::complex var) { return {reinterpret_cast(var)[0], reinterpret_cast(var)[1]}; } +#ifdef DCA_HAVE_HIP +inline magmaFloatComplex convertToMagmaType(HIP_vector_type var) { + return {reinterpret_cast(var)[0], reinterpret_cast(var)[1]}; +} + +inline magmaDoubleComplex convertToMagmaType(HIP_vector_type var) { + return {reinterpret_cast(var)[0], reinterpret_cast(var)[1]}; +} +#endif + namespace dca::util { template using MAGMATypeMap = typename std::disjunction< @@ -231,8 +241,13 @@ using MAGMATypeMap = typename std::disjunction< OnTypesEqual**, const magmaDoubleComplex**>, OnTypesEqual* const*, const magmaFloatComplex* const*>, OnTypesEqual* const*, const magmaDoubleComplex* const*>, + OnTypesEqual, + OnTypesEqual, +#ifdef DCA_HAVE_HIP + OnTypesEqual* const*, const magmaFloatComplex* const*>, + OnTypesEqual* const*, const magmaDoubleComplex* const*>, +#endif default_type>::type; - template __device__ __host__ MAGMATypeMap castMagmaType(T var) { return reinterpret_cast>(var); diff --git a/include/dca/linalg/vector.hpp b/include/dca/linalg/vector.hpp index 3e8882b77..a21fd0203 100644 --- a/include/dca/linalg/vector.hpp +++ b/include/dca/linalg/vector.hpp @@ -34,7 +34,7 @@ namespace dca { namespace linalg { // dca::linalg:: - template > class Vector : public Allocator { public: @@ -52,9 +52,8 @@ class Vector : public Allocator { /** copy constructor except for name. * this is strange but for historical reasons is kept. - * has needed to be explicit because with the `const ThisType&` somehow lead to an implicit conversion - * from an int to a Vector& argument that landed here. - * This occurred in Debug with + * has needed to be explicit because with the `const ThisType&` somehow lead to an implicit + * conversion from an int to a Vector& argument that landed here. This occurred in Debug with */ explicit Vector(const ThisType& rhs, const std::string& name = default_name_); @@ -323,7 +322,8 @@ void Vector::setAsync(const Container& rhs, } template -void Vector::setToZeroAsync(const util::GpuStream& stream [[maybe_unused]]) { +void Vector::setToZeroAsync(const util::GpuStream& stream + [[maybe_unused]]) { // TODO: implement in copy.hpp. #ifdef DCA_HAVE_GPU checkRC(cudaMemsetAsync(data_, 0, size_ * sizeof(ScalarType), stream)); @@ -333,12 +333,14 @@ void Vector::setToZeroAsync(const util::GpuS } template -void Vector::setToZero(const util::GpuStream& stream [[maybe_unused]]) { +void Vector::setToZero(const util::GpuStream& stream + [[maybe_unused]]) { dca::linalg::util::Memory::setToZero(data_, size_, stream); } // template -// void Vector::setToZero(const util::GpuStream& stream [[maybe_unused]]) { +// void Vector::setToZero(const util::GpuStream& stream +// [[maybe_unused]]) { // // TODO: implement in copy.hpp. // dca::linalg::util::memory::setToZero(data_, size_, stream); // } diff --git a/include/dca/phys/dca_step/cluster_solver/ctint/accumulator/ctint_accumulator.hpp b/include/dca/phys/dca_step/cluster_solver/ctint/accumulator/ctint_accumulator.hpp index 69ac57e18..4162d9d82 100644 --- a/include/dca/phys/dca_step/cluster_solver/ctint/accumulator/ctint_accumulator.hpp +++ b/include/dca/phys/dca_step/cluster_solver/ctint/accumulator/ctint_accumulator.hpp @@ -35,8 +35,7 @@ namespace solver { namespace ctint { // dca::phys::solver::ctint:: -template +template class CtintAccumulator : public MC_accumulator_data { public: constexpr static ClusterSolverId solver_id{ClusterSolverId::CT_INT}; @@ -47,7 +46,7 @@ class CtintAccumulator : public MC_accumulator_data using Base::accumulated_phase_; using Base::current_phase_; using Base::number_of_measurements_; - + using ParametersType = Parameters; using DataType = phys::DcaData; using SpAccumulator = accumulator::SpAccumulator; @@ -112,7 +111,7 @@ class CtintAccumulator : public MC_accumulator_data int get_number_of_measurements() const { std::cout << "number_of_measurements ==" << number_of_measurements_ << '\n'; std::cout << "accumulated_phase_.count() == " << accumulated_phase_.count() << '\n'; - //assert(accumulated_phase_.count() == number_of_measurements_); + // assert(accumulated_phase_.count() == number_of_measurements_); return number_of_measurements_; } @@ -147,7 +146,6 @@ class CtintAccumulator : public MC_accumulator_data MatrixConfiguration configuration_; std::vector streams_; - linalg::util::GpuEvent event_; util::Accumulator accumulated_order_; @@ -170,7 +168,7 @@ class CtintAccumulator : public MC_accumulator_data template template CtintAccumulator::CtintAccumulator(const Parameters& pars, - const Data& data, int id) + const Data& data, int id) : parameters_(pars), thread_id_(id), sp_accumulator_(pars), @@ -187,7 +185,7 @@ void CtintAccumulator::initialize(const int dca_iterat parameters_.dump_every_iteration()); accumulated_order_.reset(); accumulated_phase_.reset(); - + Base::initialize(dca_iteration); sp_accumulator_.resetAccumulation(); sp_accumulator_.clearSingleMeasurement(); @@ -215,13 +213,7 @@ void CtintAccumulator::updateFrom(Walker& walker) { measure_flops_ = M_[0].nrCols() * M_[0].nrCols() * 2 * 2 * 8 * 19; if constexpr (device == linalg::GPU) { - for (int s = 0; s < 2; ++s) { - event_.record(walker.get_stream(s)); - // Synchronize sp accumulator streams with walker. - event_.block(*sp_accumulator_.get_streams()[s]); - // Synchronize both walker streams with tp accumulator. - event_.block(*tp_accumulator_.get_stream()); - } + walker.synchronize(); } configuration_ = walker.getConfiguration(); diff --git a/include/dca/phys/dca_step/cluster_solver/ctint/ctint_cluster_solver.hpp b/include/dca/phys/dca_step/cluster_solver/ctint/ctint_cluster_solver.hpp index adf4b72ad..6db6d167c 100644 --- a/include/dca/phys/dca_step/cluster_solver/ctint/ctint_cluster_solver.hpp +++ b/include/dca/phys/dca_step/cluster_solver/ctint/ctint_cluster_solver.hpp @@ -37,7 +37,7 @@ #include "dca/phys/dca_step/cluster_solver/ctint/walker/ctint_walker_choice.hpp" #include "dca/phys/dca_step/cluster_solver/ctint/walker/tools/d_matrix_builder.hpp" #include "dca/phys/dca_step/cluster_solver/shared_tools/interpolation/g0_interpolation.hpp" -//#include "dca/phys/dca_step/cluster_solver/shared_tools/accumulation/time_correlator.hpp" +// #include "dca/phys/dca_step/cluster_solver/shared_tools/accumulation/time_correlator.hpp" #include "dca/phys/dca_data/dca_data.hpp" #include "dca/phys/dca_loop/dca_loop_data.hpp" #include "dca/phys/dca_step/symmetrization/symmetrize.hpp" @@ -58,7 +58,7 @@ class CtintClusterSolver { static constexpr ClusterSolverId solver_type{ClusterSolverId::CT_INT}; using Real = typename config::McOptions::MC_REAL; - using Scalar = typename dca::util::ScalarSelect::type; + using Scalar = typename dca::util::ScalarSelect::type; using Concurrency = typename Parameters::concurrency_type; using CDA = ClusterDomainAliases; @@ -111,8 +111,11 @@ class CtintClusterSolver { // Returns the function G(k,w) without averaging across MPI ranks. auto local_G_k_w() const; - DMatrixBuilder& getResource() { return *d_matrix_builder_; }; -protected: // thread jacket interface. + DMatrixBuilder& getResource() { + return *d_matrix_builder_; + }; + +protected: // thread jacket interface. using ParametersType = Parameters; using DataType = Data; using Rng = typename Parameters::random_number_generator; @@ -140,8 +143,8 @@ class CtintClusterSolver { /** gather all M and G4 and accumulated sign * \param[out] Returns: average phase - * \param[in,out] G greens function has allreduce or leaveoneoutSum applied to it - * side effect seems undesirable and motivated by saving copy. + * \param[in,out] G greens function has allreduce or leaveoneoutSum applied to + * it side effect seems undesirable and motivated by saving copy. * \param[in] compute_error does leave one out sum removing the local accumulated type. */ auto gatherMAndG4(SpGreensFunction& M, bool compute_error) const; @@ -192,7 +195,7 @@ CtintClusterSolver::CtintClusterSolver( template void CtintClusterSolver::initialize(int dca_iteration) { dca_iteration_ = dca_iteration; - + g0_.initializeShrinked(data_.G0_r_t_cluster_excluded); d_matrix_builder_->setAlphas(parameters_.getAlphas(), parameters_.adjustAlphaDd()); @@ -202,7 +205,6 @@ void CtintClusterSolver::initialize(i if (concurrency_.id() == concurrency_.first()) std::cout << "\n\n\t CT-INT Integrator has initialized (DCA-iteration : " << dca_iteration << ")\n\n"; - } template @@ -353,10 +355,8 @@ void CtintClusterSolver::warmUp() { const int n_sweep = parameters_.get_warm_up_sweeps(); for (int i = 0; i < n_sweep; i++) { walker_->doSweep(); - walker_->updateShell(i, n_sweep); } - walker_->markThermalized(); } diff --git a/include/dca/phys/dca_step/cluster_solver/ctint/structs/solver_configuration.hpp b/include/dca/phys/dca_step/cluster_solver/ctint/structs/solver_configuration.hpp index c63d70f18..94d9753bc 100644 --- a/include/dca/phys/dca_step/cluster_solver/ctint/structs/solver_configuration.hpp +++ b/include/dca/phys/dca_step/cluster_solver/ctint/structs/solver_configuration.hpp @@ -33,6 +33,8 @@ namespace solver { namespace ctint { // dca::phys::solver::ctint:: + +// I think the class hierarch isn't helpful here. class SolverConfiguration : public MatrixConfiguration { public: using BaseClass = MatrixConfiguration; diff --git a/include/dca/phys/dca_step/cluster_solver/ctint/walker/ctint_walker_base.hpp b/include/dca/phys/dca_step/cluster_solver/ctint/walker/ctint_walker_base.hpp index 98a8fa2b2..469785075 100644 --- a/include/dca/phys/dca_step/cluster_solver/ctint/walker/ctint_walker_base.hpp +++ b/include/dca/phys/dca_step/cluster_solver/ctint/walker/ctint_walker_base.hpp @@ -41,6 +41,8 @@ #include "dca/phys/dca_step/cluster_solver/ctint/walker/tools/d_matrix_builder_gpu.hpp" #endif +//#define DEBUG_SUBMATRIX + namespace dca { namespace phys { namespace solver { diff --git a/include/dca/phys/dca_step/cluster_solver/ctint/walker/ctint_walker_cpu_submatrix.hpp b/include/dca/phys/dca_step/cluster_solver/ctint/walker/ctint_walker_cpu_submatrix.hpp index 80711ee45..f183c1620 100644 --- a/include/dca/phys/dca_step/cluster_solver/ctint/walker/ctint_walker_cpu_submatrix.hpp +++ b/include/dca/phys/dca_step/cluster_solver/ctint/walker/ctint_walker_cpu_submatrix.hpp @@ -66,12 +66,12 @@ class CtintWalkerSubmatrixCpu : public CtintWalkerSubmatrixBase& d_matrix_builder_; @@ -258,10 +258,6 @@ void CtintWalkerSubmatrixCpu::computeMInit() { d_matrix_builder_.computeG0(D_, configuration_.getSector(s), n_init_[s], n_max_[s], 0); -#ifdef DEBUG_SUBMATRIX - D_.print(); -#endif - std::array, 2> f_values; f_values[s].resize(n_init_[s]); for (int j = 0; j < n_init_[s]; ++j) { @@ -284,7 +280,6 @@ void CtintWalkerSubmatrixCpu::computeMInit() { using namespace dca::addt_str_oper; std::cout << "M_[" << s << "] size: " << M_[s].size() << '\n'; #endif - M_[s].resize(n_max_[s]); MatrixView M(M_[s], 0, 0, n_init_[s], n_init_[s]); @@ -301,6 +296,7 @@ void CtintWalkerSubmatrixCpu::computeMInit() { #ifdef DEBUG_SUBMATRIX D_M.print(); #endif + for (int i = 0; i < n_max_[s]; ++i) { for (int j = n_init_[s]; j < n_max_[s]; ++j) { M_[s](i, j) = 0; diff --git a/include/dca/phys/dca_step/cluster_solver/ctint/walker/ctint_walker_gpu_submatrix.hpp b/include/dca/phys/dca_step/cluster_solver/ctint/walker/ctint_walker_gpu_submatrix.hpp index 17cb337c3..277115fb0 100644 --- a/include/dca/phys/dca_step/cluster_solver/ctint/walker/ctint_walker_gpu_submatrix.hpp +++ b/include/dca/phys/dca_step/cluster_solver/ctint/walker/ctint_walker_gpu_submatrix.hpp @@ -24,7 +24,6 @@ #include #include -#include "dca/linalg/util/gpu_event.hpp" #include "dca/phys/dca_step/cluster_solver/ctint/walker/ctint_walker_submatrix_base.hpp" #include "dca/phys/dca_step/cluster_solver/ctint/structs/device_configuration_manager.hpp" #include "dca/phys/dca_step/cluster_solver/ctint/walker/tools/d_matrix_builder_gpu.hpp" @@ -91,16 +90,13 @@ class CtintWalkerSubmatrixGpu : public CtintWalkerSubmatrixBase getRawG(); MatrixPair getRawM(); - void updateM() override; -private: - void doStep() override; - protected: using BaseClass::configuration_; using BaseClass::M_; @@ -140,6 +136,7 @@ class CtintWalkerSubmatrixGpu : public CtintWalkerSubmatrixBase, 2> source_list_dev_; MatrixPair M_dev_; + MatrixPair M_D_dev_; MatrixPair Gamma_inv_dev_; MatrixPair D_dev_; MatrixPair G_dev_; @@ -259,9 +256,10 @@ template void CtintWalkerSubmatrixGpu::computeMInit() { // Profiler profiler(__FUNCTION__, "CT-INT GPU walker", __LINE__, thread_id_); get_stream()->sync(); - for (int s = 0; s < 2; ++s) + for (int s = 0; s < 2; ++s) { M_dev_[s].resize(n_max_[s]); - + M_D_dev_[s].resize(n_max_[s]); + } for (int s = 0; s < 2; ++s) { const int delta = n_max_[s] - n_init_[s]; if (delta > 0) { @@ -320,10 +318,10 @@ template void CtintWalkerSubmatrixGpu::computeGInit() { // Profiler profiler(__FUNCTION__, "CT-INT GPU walker", __LINE__, thread_id_); get_stream()->sync(); + for (int s = 0; s < 2; ++s) { const int delta = n_max_[s] - n_init_[s]; - // In cpu we only do all this if delta > 0 auto& f_dev = f_dev_[s]; G_dev_[s].resizeNoCopy(n_max_[s]); @@ -477,6 +475,7 @@ CtintWalkerSubmatrixGpu::MatrixPair CtintWalkerSu template CtintWalkerSubmatrixGpu::MatrixPair CtintWalkerSubmatrixGpu< + Parameters, DIST>::getM() { std::array, 2> M; synchronize(); diff --git a/include/dca/phys/dca_step/cluster_solver/ctint/walker/ctint_walker_submatrix_base.hpp b/include/dca/phys/dca_step/cluster_solver/ctint/walker/ctint_walker_submatrix_base.hpp index 1906f5a74..bd92c2001 100644 --- a/include/dca/phys/dca_step/cluster_solver/ctint/walker/ctint_walker_submatrix_base.hpp +++ b/include/dca/phys/dca_step/cluster_solver/ctint/walker/ctint_walker_submatrix_base.hpp @@ -39,7 +39,8 @@ class CtintWalkerSubmatrixBase : public CtintWalkerBase { using typename BaseClass::Real; using typename BaseClass::Scalar; - CtintWalkerSubmatrixBase(const Parameters& pars_ref, const Data& /*data*/, Rng& rng_ref, int id = 0); + CtintWalkerSubmatrixBase(const Parameters& pars_ref, const Data& /*data*/, Rng& rng_ref, + int id = 0); virtual ~CtintWalkerSubmatrixBase() = default; @@ -50,7 +51,12 @@ class CtintWalkerSubmatrixBase : public CtintWalkerBase { using BaseClass::order; virtual void setMFromConfig() = 0; - auto getF() const { return f_; } + auto getF() const { + return f_; + } + + virtual void markThermalized() override; + protected: virtual void doStep() override; void doSteps(); @@ -72,8 +78,6 @@ class CtintWalkerSubmatrixBase : public CtintWalkerBase { */ void mainSubmatrixProcess(); - void markThermalized() override; - void transformM(); // For testing purposes. @@ -84,7 +88,6 @@ class CtintWalkerSubmatrixBase : public CtintWalkerBase { virtual void computeMInit() = 0; private: - void doSubmatrixUpdate(); /** returns [acceptance_probability , mc_weight_ratio ] @@ -217,16 +220,15 @@ class CtintWalkerSubmatrixBase : public CtintWalkerBase { template CtintWalkerSubmatrixBase::CtintWalkerSubmatrixBase(const Parameters& parameters_ref, - const Data& /*data*/, - Rng& rng_ref, - int id) - : BaseClass(parameters_ref, rng_ref, id) { + const Data& /*data*/, + Rng& rng_ref, int id) + : BaseClass(parameters_ref, rng_ref, id) { if (BaseClass::concurrency_.id() == BaseClass::concurrency_.first() && thread_id_ == 0) std::cout << "\nCT-INT submatrix walker created." << std::endl; } template -void CtintWalkerSubmatrixBase::markThermalized() { +void CtintWalkerSubmatrixBase::markThermalized() { thermalized_ = true; nb_steps_per_sweep_ = std::max(1., std::ceil(sweeps_per_meas_ * partial_order_avg_.mean())); @@ -239,7 +241,7 @@ void CtintWalkerSubmatrixBase::markThermalized() { // Recompute the Monte Carlo weight. setMFromConfig(); #ifndef NDEBUG - //writeAlphas(); + // writeAlphas(); #endif } diff --git a/include/dca/phys/dca_step/cluster_solver/ctint/walker/tools/d_matrix_builder.hpp b/include/dca/phys/dca_step/cluster_solver/ctint/walker/tools/d_matrix_builder.hpp index ecec506e0..40bdb92a4 100644 --- a/include/dca/phys/dca_step/cluster_solver/ctint/walker/tools/d_matrix_builder.hpp +++ b/include/dca/phys/dca_step/cluster_solver/ctint/walker/tools/d_matrix_builder.hpp @@ -83,6 +83,8 @@ class DMatrixBuilder { void computeG0(Matrix& G0, const Sector& configuration, const int n_init, const int n_max, const int which_section) const; + auto& getSiteDiff() { return site_diff_; } + #ifdef DCA_HAVE_GPU virtual void computeG0(linalg::Matrix& /*G0*/, const details::DeviceConfiguration& /*configuration*/, int /*n_init*/, diff --git a/include/dca/phys/dca_step/cluster_solver/shared_tools/cluster_helper.cuh b/include/dca/phys/dca_step/cluster_solver/shared_tools/cluster_helper.cuh index 56b2527df..9a47b7973 100644 --- a/include/dca/phys/dca_step/cluster_solver/shared_tools/cluster_helper.cuh +++ b/include/dca/phys/dca_step/cluster_solver/shared_tools/cluster_helper.cuh @@ -31,7 +31,7 @@ class ClusterHelper { public: /// Initialize real Cluster static void set(int nc, const int* add, int lda, const int* sub, int lds); - /// Initialize reciprocal cluster + /// Initialize reciprocal cluster static void setMomentum(int nc, const int* add, int lda, const int* sub, int lds); // Returns the index of id_1 + id_2. diff --git a/include/dca/phys/dca_step/cluster_solver/shared_tools/solver_helper.cuh b/include/dca/phys/dca_step/cluster_solver/shared_tools/solver_helper.cuh index 8f2b75355..bdc861b3b 100644 --- a/include/dca/phys/dca_step/cluster_solver/shared_tools/solver_helper.cuh +++ b/include/dca/phys/dca_step/cluster_solver/shared_tools/solver_helper.cuh @@ -44,7 +44,6 @@ public: private: static bool initialized_; - std::size_t subdm_step_[2]; }; diff --git a/include/dca/phys/parameters/mc_solver_parameters.hpp b/include/dca/phys/parameters/mc_solver_parameters.hpp index 595b946b5..c2440a50c 100644 --- a/include/dca/phys/parameters/mc_solver_parameters.hpp +++ b/include/dca/phys/parameters/mc_solver_parameters.hpp @@ -99,7 +99,7 @@ int McSolverParameters::getBufferSize(const Concurrency template void McSolverParameters::pack(const Concurrency& concurrency, char* buffer, - int buffer_size, int& position) const { + int buffer_size, int& position) const { concurrency.pack(buffer, buffer_size, position, expansion_parameter_K_); concurrency.pack(buffer, buffer_size, position, initial_configuration_size_); concurrency.pack(buffer, buffer_size, position, initial_matrix_size_); @@ -110,7 +110,7 @@ void McSolverParameters::pack(const Concurrency& concur template void McSolverParameters::unpack(const Concurrency& concurrency, char* buffer, - int buffer_size, int& position) { + int buffer_size, int& position) { concurrency.unpack(buffer, buffer_size, position, expansion_parameter_K_); concurrency.unpack(buffer, buffer_size, position, initial_configuration_size_); concurrency.unpack(buffer, buffer_size, position, initial_matrix_size_); @@ -207,16 +207,18 @@ int McSolverParameters::getBufferSize(const Concurre } template -void McSolverParameters::pack(const Concurrency& concurrency, char* buffer, - int buffer_size, int& position) const { +void McSolverParameters::pack(const Concurrency& concurrency, + char* buffer, int buffer_size, + int& position) const { concurrency.pack(buffer, buffer_size, position, self_energy_tail_cutoff_); concurrency.pack(buffer, buffer_size, position, steps_per_sweep_); concurrency.pack(buffer, buffer_size, position, shifts_per_sweep_); } template -void McSolverParameters::unpack(const Concurrency& concurrency, char* buffer, - int buffer_size, int& position) { +void McSolverParameters::unpack(const Concurrency& concurrency, + char* buffer, int buffer_size, + int& position) { concurrency.unpack(buffer, buffer_size, position, self_energy_tail_cutoff_); concurrency.unpack(buffer, buffer_size, position, steps_per_sweep_); concurrency.unpack(buffer, buffer_size, position, shifts_per_sweep_); @@ -252,6 +254,17 @@ void McSolverParameters::readWrite(ReaderOrWriter& r // Specialization for CT-INT template <> class McSolverParameters { +private: + /// code does not currently cover corner case of configuration size 0, as such this isn't a sensible default to use. + int initial_configuration_size_ = 2; + double alpha_dd_pos_ = 0.501; + double alpha_dd_neg_ = 0; + double alpha_ndd_ = 1e-4; + bool adjust_alpha_dd_ = false; + double double_update_probability_ = 0; + bool all_sites_partnership_ = 0; + int max_submatrix_size_ = 1; + public: template int getBufferSize(const Concurrency& concurrency) const; @@ -298,16 +311,6 @@ class McSolverParameters { void setMaxSubmatrixSize(const int size) { max_submatrix_size_ = size; } - -private: - int initial_configuration_size_ = 0; - double alpha_dd_pos_ = 0.501; - double alpha_dd_neg_ = 0; - double alpha_ndd_ = 1e-4; - bool adjust_alpha_dd_ = false; - double double_update_probability_ = 0; - bool all_sites_partnership_ = 0; - int max_submatrix_size_ = 1; }; template @@ -327,7 +330,7 @@ int McSolverParameters::getBufferSize(const Concurrency template void McSolverParameters::pack(const Concurrency& concurrency, char* buffer, - const int buffer_size, int& position) const { + const int buffer_size, int& position) const { concurrency.pack(buffer, buffer_size, position, initial_configuration_size_); concurrency.pack(buffer, buffer_size, position, alpha_dd_pos_); concurrency.pack(buffer, buffer_size, position, alpha_dd_neg_); @@ -340,7 +343,7 @@ void McSolverParameters::pack(const Concurrency& concur template void McSolverParameters::unpack(const Concurrency& concurrency, char* buffer, - const int buffer_size, int& position) { + const int buffer_size, int& position) { concurrency.unpack(buffer, buffer_size, position, initial_configuration_size_); concurrency.unpack(buffer, buffer_size, position, alpha_dd_pos_); concurrency.unpack(buffer, buffer_size, position, alpha_dd_neg_); diff --git a/include/dca/phys/parameters/physics_parameters.hpp b/include/dca/phys/parameters/physics_parameters.hpp index 51f53cba4..12ff59554 100644 --- a/include/dca/phys/parameters/physics_parameters.hpp +++ b/include/dca/phys/parameters/physics_parameters.hpp @@ -57,6 +57,7 @@ class PhysicsParameters { return adjust_chemical_potential_; } + const void set_beta(double beta) { beta_ = beta; } private: double beta_; double density_; diff --git a/src/phys/dca_step/cluster_solver/ctint/walker/tools/d_matrix_builder.cpp b/src/phys/dca_step/cluster_solver/ctint/walker/tools/d_matrix_builder.cpp index c188ee348..14365bbba 100644 --- a/src/phys/dca_step/cluster_solver/ctint/walker/tools/d_matrix_builder.cpp +++ b/src/phys/dca_step/cluster_solver/ctint/walker/tools/d_matrix_builder.cpp @@ -126,7 +126,7 @@ auto DMatrixBuilder::computeGamma(const int aux_spin_type, const in // Compute only the parts of G0 required at a given moment. (Re)Computing every element is not needed in most situations. template -void DMatrixBuilder::computeG0(Matrix& G0, const Sector& configuration, const int n_init, + void DMatrixBuilder::computeG0(Matrix& G0, const Sector& configuration, const int n_init, const int n_max, const int which_section) const { int b_i, b_j, r_i, r_j; Real tau_i, tau_j; diff --git a/src/phys/dca_step/cluster_solver/ctint/walker/tools/d_matrix_builder_gpu.cpp b/src/phys/dca_step/cluster_solver/ctint/walker/tools/d_matrix_builder_gpu.cpp index 2b59fffa6..4f2b171f9 100644 --- a/src/phys/dca_step/cluster_solver/ctint/walker/tools/d_matrix_builder_gpu.cpp +++ b/src/phys/dca_step/cluster_solver/ctint/walker/tools/d_matrix_builder_gpu.cpp @@ -32,6 +32,8 @@ DMatrixBuilder::DMatrixBuilder(const G0Interpolation G0, const int b_j = config.getRightB(j); const auto tau_j = config.getTau(j); + const int label = dca::phys::solver::details::solver_helper.index(b_i, b_j, config.getLeftR(i), config.getRightR(j)); diff --git a/src/phys/dca_step/cluster_solver/ctint/walker/walker_kernels.cu b/src/phys/dca_step/cluster_solver/ctint/walker/walker_kernels.cu index f89b48cd2..9a27dac9d 100644 --- a/src/phys/dca_step/cluster_solver/ctint/walker/walker_kernels.cu +++ b/src/phys/dca_step/cluster_solver/ctint/walker/walker_kernels.cu @@ -34,6 +34,7 @@ using dca::util::castGPUType; template __global__ void setRightSectorToIdKernel(Scalar* m, const int ldm, const int n0, const int n_max) { const int i = threadIdx.x + blockDim.x * blockIdx.x; + // this preserves the behavior where we skip the lower left sector. const int j = threadIdx.y + blockDim.y * blockIdx.y + n0; if (i >= n_max || j >= n_max) diff --git a/src/phys/dca_step/cluster_solver/shared_tools/cluster_helper.cu b/src/phys/dca_step/cluster_solver/shared_tools/cluster_helper.cu index 62eb122be..072d1af19 100644 --- a/src/phys/dca_step/cluster_solver/shared_tools/cluster_helper.cu +++ b/src/phys/dca_step/cluster_solver/shared_tools/cluster_helper.cu @@ -85,11 +85,14 @@ void ClusterHelper::setMomentum(int nc, const int* add, int lda, const int* sub, size_t cluster_helper_size; - checkRC(cudaMemcpyToSymbol(HIP_SYMBOL(cluster_momentum_add_matrix), &host_helper.add_matrix_, sizeof(int*))); - checkRC(cudaMemcpyToSymbol(HIP_SYMBOL(cluster_momentum_sub_matrix), &host_helper.sub_matrix_, sizeof(int*))); + checkRC(cudaMemcpyToSymbol(HIP_SYMBOL(cluster_momentum_add_matrix), &host_helper.add_matrix_, + sizeof(int*))); + checkRC(cudaMemcpyToSymbol(HIP_SYMBOL(cluster_momentum_sub_matrix), &host_helper.sub_matrix_, + sizeof(int*))); checkRC(cudaGetSymbolSize(&cluster_helper_size, HIP_SYMBOL(cluster_momentum_helper))); assert(cluster_helper_size == sizeof(ClusterHelper)); - checkRC(cudaMemcpyToSymbol(HIP_SYMBOL(cluster_momentum_helper), &host_helper, sizeof(ClusterHelper))); + checkRC(cudaMemcpyToSymbol(HIP_SYMBOL(cluster_momentum_helper), &host_helper, + sizeof(ClusterHelper))); checkRC(cudaDeviceSynchronize()); #ifdef DEBUG_CLUSTER_HELPER checkClusterMomentumHelper<<<1, 1, 0, 0>>>(nc, lds); @@ -127,8 +130,10 @@ void ClusterHelper::set(int nc, const int* add, int lda, const int* sub, int lds checkRC(cudaGetSymbolSize(&cluster_helper_size, HIP_SYMBOL(cluster_real_helper))); assert(cluster_helper_size == sizeof(ClusterHelper)); - checkRC(cudaMemcpyToSymbol(HIP_SYMBOL(cluster_add_matrix), &host_helper.add_matrix_, sizeof(int*))); - checkRC(cudaMemcpyToSymbol(HIP_SYMBOL(cluster_sub_matrix), &host_helper.sub_matrix_, sizeof(int*))); + checkRC( + cudaMemcpyToSymbol(HIP_SYMBOL(cluster_add_matrix), &host_helper.add_matrix_, sizeof(int*))); + checkRC( + cudaMemcpyToSymbol(HIP_SYMBOL(cluster_sub_matrix), &host_helper.sub_matrix_, sizeof(int*))); checkRC(cudaMemcpyToSymbol(HIP_SYMBOL(cluster_real_helper), &host_helper, cluster_helper_size)); checkRC(cudaDeviceSynchronize()); diff --git a/test/integration/cluster_solver/ctint/ctint_double_update_comparison_test.cpp b/test/integration/cluster_solver/ctint/ctint_double_update_comparison_test.cpp index dd218f5f2..33104e949 100644 --- a/test/integration/cluster_solver/ctint/ctint_double_update_comparison_test.cpp +++ b/test/integration/cluster_solver/ctint/ctint_double_update_comparison_test.cpp @@ -46,8 +46,7 @@ using McOptions = MockMcOptions; #include "dca/profiling/null_profiler.hpp" #include "dca/util/git_version.hpp" #include "dca/util/modules.hpp" - -const std::string input_dir = DCA_SOURCE_DIR "/test/integration/cluster_solver/ctint/"; +#include "test/unit/phys/dca_step/cluster_solver/test_setup.hpp" template void initializeWalkerStatic(const G0& g0, const Parameters& parameters, const Data& data) { @@ -56,7 +55,23 @@ void initializeWalkerStatic(const G0& g0, const Parameters& parameters, const Da WalkerType::setInteractionVertices(data, parameters); } -TEST(CtintDoubleUpdateComparisonTest, Self_Energy) { +constexpr char input_name[] = + DCA_SOURCE_DIR "/test/integration/cluster_solver/ctint/double_insertion_comparison_input.json"; + +struct CtintDoubleUpdateComparisonTest : public ::testing::Test { + using G0Setup = dca::testing::G0SetupBare; + virtual void SetUp() { + host_setup.SetUp(); + host2_setup.SetUp(); + } + + virtual void TearDown() {} + G0Setup host_setup; + G0Setup host2_setup; +}; + +TEST_F(CtintDoubleUpdateComparisonTest, Self_Energy) { using RngType = dca::testing::StubRng; using RealRng = dca::math::random::StdRandomWrapper; using Lattice = dca::phys::models::FeAsLattice; @@ -65,44 +80,45 @@ TEST(CtintDoubleUpdateComparisonTest, Self_Energy) { using Concurrency = dca::parallel::NoConcurrency; using Parameters = dca::phys::params::Parameters, Scalar>>; + RngType, dca::ClusterSolverId::CT_INT, + dca::NumericalTraits, Scalar>>; using Data = dca::phys::DcaData; using Walker = testing::phys::solver::ctint::WalkerWrapper; using dca::DistType; - using WalkerSubmatrix = - testing::phys::solver::ctint::WalkerWrapperSubmatrix; + using WalkerSubmatrix = testing::phys::solver::ctint::WalkerWrapperSubmatrix; + + auto& cpu_data = host_setup.data_; + auto& cpu2_data = host2_setup.data_; + auto& cpu_parameters = host_setup.parameters_; + auto& cpu2_parameters = host2_setup.parameters_; Concurrency concurrency(0, nullptr); dca::util::GitVersion::print(); dca::util::Modules::print(); - Parameters parameters(dca::util::GitVersion::string(), concurrency); - parameters.read_input_and_broadcast( - input_dir + "/double_insertion_comparison_input.json"); - parameters.update_model(); - parameters.update_domains(); - - Data data(parameters); - data.initialize(); - - dca::phys::solver::G0Interpolation g0( - dca::phys::solver::ctint::details::shrinkG0(data.G0_r_t)); + dca::phys::solver::G0Interpolation g0_cpu( + dca::phys::solver::ctint::details::shrinkG0(cpu_data->G0_r_t)); + dca::phys::solver::G0Interpolation g0_cpu2( + dca::phys::solver::ctint::details::shrinkG0(cpu2_data->G0_r_t)); RealRng rng(0, 1); std::vector rng_vals(10000); for (auto& x : rng_vals) x = rng(); - RngType rng1(rng_vals), rng2(rng_vals); + RngType rng1(rng_vals); + RngType rng2(rng_vals); using RDmn = typename Parameters::RClusterDmn; - - dca::phys::solver::ctint::DMatrixBuilder d_matrix_builder_(g0, Parameters::lattice_type::BANDS, RDmn()); - Walker walker1(parameters, rng1, d_matrix_builder_); - parameters.setMaxSubmatrixSize(16); - dca::phys::solver::ctint::DMatrixBuilder d_matrix_builder_2(g0, Parameters::lattice_type::BANDS, RDmn()); - WalkerSubmatrix walker2(parameters, rng2, d_matrix_builder_2); + dca::phys::solver::ctint::DMatrixBuilder d_matrix_builder_( + g0_cpu, Parameters::lattice_type::BANDS, RDmn()); + Walker walker1(cpu_parameters, *cpu_data, rng1, d_matrix_builder_); + + cpu2_parameters.setMaxSubmatrixSize(16); + dca::phys::solver::ctint::DMatrixBuilder d_matrix_builder_2( + g0_cpu2, Parameters::lattice_type::BANDS, RDmn()); + WalkerSubmatrix walker2(cpu2_parameters, *cpu2_data, rng2, d_matrix_builder_2); EXPECT_NEAR(walker1.get_MC_log_weight(), walker2.get_MC_log_weight(), 5e-7); @@ -116,7 +132,7 @@ TEST(CtintDoubleUpdateComparisonTest, Self_Energy) { EXPECT_NEAR(walker1.get_MC_log_weight(), walker2.get_MC_log_weight(), 5e-7); EXPECT_EQ(walker1.get_sign(), walker2.get_sign()); - auto check_direct_weight = [] (auto& walker) { + auto check_direct_weight = [](auto& walker) { const auto fast_weight = walker.get_MC_log_weight(); walker.setMFromConfig(); const auto direct_weight = walker.get_MC_log_weight(); diff --git a/test/unit/linalg/matrixop_cpu_gpu_test.cpp b/test/unit/linalg/matrixop_cpu_gpu_test.cpp index 8a1324fce..a96256f3d 100644 --- a/test/unit/linalg/matrixop_cpu_gpu_test.cpp +++ b/test/unit/linalg/matrixop_cpu_gpu_test.cpp @@ -39,7 +39,6 @@ TEST(MatrixopCPUGPUTest, difference) { dca::linalg::Matrix> b(a); b(ia, ja) += sg * diff; double err = std::abs(epsilon * b(ia, ja)); - EXPECT_NEAR(diff, dca::linalg::matrixop::difference(da, b, 2 * diff), err); EXPECT_NEAR(diff, dca::linalg::matrixop::difference(da, b, diff + err), err); auto diffcalc = dca::linalg::matrixop::difference(da, b, 2 * diff); diff --git a/test/unit/linalg/util/CMakeLists.txt b/test/unit/linalg/util/CMakeLists.txt index 4eef721ed..8a427007c 100644 --- a/test/unit/linalg/util/CMakeLists.txt +++ b/test/unit/linalg/util/CMakeLists.txt @@ -29,7 +29,7 @@ if(DCA_HAVE_CUDA) dca_add_gtest(complex_op_cuda_test GTEST_MAIN CUDA - LIBS ${DCA_GPU_LIBS} magma::magma BLAS::BLAS) + LIBS ${DCA_GPU_LIBS} magma::magma) dca_gpu_runtime_link(complex_op_cuda_test) endif() diff --git a/test/unit/phys/dca_step/cluster_solver/ctint/walker/ct_int_walker_submatrix_gpu_test.cpp b/test/unit/phys/dca_step/cluster_solver/ctint/walker/ct_int_walker_submatrix_gpu_test.cpp index 0edaad83c..cb7f61571 100644 --- a/test/unit/phys/dca_step/cluster_solver/ctint/walker/ct_int_walker_submatrix_gpu_test.cpp +++ b/test/unit/phys/dca_step/cluster_solver/ctint/walker/ct_int_walker_submatrix_gpu_test.cpp @@ -30,8 +30,8 @@ using McOptions = MockMcOptions; #include "walker_wrapper_submatrix.hpp" #include "dca/linalg/matrixop.hpp" #include "dca/phys/dca_step/cluster_solver/ctint/details/solver_methods.hpp" - #include "dca/util/to_string.hpp" + using namespace dca::addt_str_oper; constexpr char input_name[] = @@ -62,12 +62,12 @@ using namespace dca::phys::solver; template using CtintWalkerSubmatrixGpuTest = CtINTWalkerSubmatrixGPUTestT; -template +template using DMatrixBuilder = dca::phys::solver::ctint::DMatrixBuilder; // Currently testing float isn't really possible due to the way the Scalar type is // carried through from mc_options. See test_setup.hpp PD -using ScalarTypes = ::testing::Types; //double, +using ScalarTypes = ::testing::Types; // double, TYPED_TEST_CASE(CtintWalkerSubmatrixGpuTest, ScalarTypes); // Compare the submatrix update with a direct computation of the M matrix, and compare the @@ -80,9 +80,9 @@ TYPED_TEST(CtintWalkerSubmatrixGpuTest, doSteps) { using Matrix = typename Walker::Matrix; using MatrixPair = std::array; using SbmWalkerCpu = - testing::phys::solver::ctint::WalkerWrapperSubmatrix; + testing::phys::solver::ctint::WalkerWrapperSubmatrix; using SbmWalkerGpu = - testing::phys::solver::ctint::WalkerWrapperSubmatrix; + testing::phys::solver::ctint::WalkerWrapperSubmatrix; std::vector setup_rngs{0., 0.00, 0.9, 0.5, 0.01, 0, 0.75, 0.02, 0, 0.6, 0.03, 1, 0.99, 0.04, 0.99}; @@ -100,15 +100,14 @@ TYPED_TEST(CtintWalkerSubmatrixGpuTest, doSteps) { G0Interpolation g0_gpu(g0_func_gpu); typename CtintWalkerSubmatrixGpuTest::G0Setup::LabelDomain label_dmn; - constexpr int bands = dca::testing::LatticeBilayer::BANDS; DMatrixBuilder d_matrix_cpu(g0_cpu, bands, RDmn()); + d_matrix_cpu.setAlphas(cpu_parameters.getAlphas(), false); // cpu_parameters.adjustAlphaDd()); SbmWalkerCpu::setInteractionVertices(cpu_data, cpu_parameters); - d_matrix_cpu.setAlphas(cpu_parameters.getAlphas(), false); //cpu_parameters.adjustAlphaDd()); DMatrixBuilder d_matrix_gpu(g0_gpu, bands, RDmn()); + d_matrix_gpu.setAlphas(gpu_parameters.getAlphas(), false); // gpu_parameters.adjustAlphaDd()); SbmWalkerGpu::setInteractionVertices(cpu_data, gpu_parameters); - d_matrix_gpu.setAlphas(gpu_parameters.getAlphas(), false); //gpu_parameters.adjustAlphaDd()); // ************************************ // Test vertex insertion / removal **** @@ -162,21 +161,27 @@ TYPED_TEST(CtintWalkerSubmatrixGpuTest, doSteps) { SbmWalkerGpu walker_gpu(gpu_parameters, gpu_rng, d_matrix_gpu); walker_gpu.setInteractionVertices(gpu_data, gpu_parameters); + // I don't think we can call these before steps are done. At least the CPU implementation has nan's in M + // MatrixPair old_M_cpu(walker_cpu.getM()); + // MatrixPair old_M_gpu(walker_gpu.getM()); + + // doSweep does this + walker_gpu.uploadConfiguration(); + + constexpr Scalar tolerance = std::numeric_limits::epsilon() * 100; + + // for (int s = 0; s < 2; ++s) + // EXPECT_TRUE(dca::linalg::matrixop::areNear(old_M_cpu[s], old_M_gpu[s], tolerance)); + cpu_rng.setNewValues(rng_vals); walker_cpu.doStep(steps); + gpu_rng.setNewValues(rng_vals); walker_gpu.doStep(steps); // doSweep does this walker_gpu.uploadConfiguration(); - constexpr Scalar tolerance = std::numeric_limits::epsilon() * 100; - - auto M_cpu = walker_cpu.getM(); - auto M_gpu = walker_gpu.getM(); - for (int s = 0; s < 2; ++s) - EXPECT_TRUE(dca::linalg::matrixop::areNear(M_cpu[s], M_gpu[s], tolerance)); - // The final configuration is the same. const auto& config1 = walker_cpu.getWalkerConfiguration(); const auto& config2 = walker_gpu.getWalkerConfiguration(); @@ -185,6 +190,9 @@ TYPED_TEST(CtintWalkerSubmatrixGpuTest, doSteps) { EXPECT_EQ(config1[i], config2[i]); EXPECT_EQ(walker_cpu.get_sign(), walker_gpu.get_sign()); + auto M_cpu = walker_cpu.getM(); + auto M_gpu = walker_gpu.getM(); + auto fail = compareSubMatrix(M_cpu, M_gpu, "M"); EXPECT_FALSE(fail); } diff --git a/test/unit/phys/dca_step/cluster_solver/ctint/walker/ct_int_walker_submatrix_test.cpp b/test/unit/phys/dca_step/cluster_solver/ctint/walker/ct_int_walker_submatrix_test.cpp index a768310f4..9a39bdaeb 100644 --- a/test/unit/phys/dca_step/cluster_solver/ctint/walker/ct_int_walker_submatrix_test.cpp +++ b/test/unit/phys/dca_step/cluster_solver/ctint/walker/ct_int_walker_submatrix_test.cpp @@ -34,17 +34,17 @@ constexpr char input_name[] = template using CtintWalkerSubmatrixTest = - typename dca::testing::G0Setup; - - using CDA = dca::phys::ClusterDomainAliases; - using RDmn = typename CDA::RClusterDmn; + typename dca::testing::G0Setup; +using CDA = dca::phys::ClusterDomainAliases; +using RDmn = typename CDA::RClusterDmn; using namespace dca::phys::solver; // Currently testing float isn't really possible due to the way the Scalar type is // carried through from mc_options. See test_setup.hpp PD -using ScalarTypes = ::testing::Types; //double, +using ScalarTypes = ::testing::Types; // double, TYPED_TEST_CASE(CtintWalkerSubmatrixTest, ScalarTypes); // Compare the submatrix update with a direct computation of the M matrix, and compare the @@ -58,7 +58,7 @@ TYPED_TEST(CtintWalkerSubmatrixTest, doSteps) { using Matrix = typename Walker::Matrix; using MatrixPair = std::array; using SubmatrixWalker = - testing::phys::solver::ctint::WalkerWrapperSubmatrix; + testing::phys::solver::ctint::WalkerWrapperSubmatrix; std::vector setup_rngs{0., 0.00, 0.9, 0.5, 0.01, 0, 0.75, 0.02, 0, 0.6, 0.03, 1, 0.99, 0.04, 0.99}; @@ -117,7 +117,7 @@ TYPED_TEST(CtintWalkerSubmatrixTest, doSteps) { const auto tolerance = 1000.0 * std::numeric_limits>::epsilon(); for (int s = 0; s < 2; ++s) - EXPECT_TRUE(dca::linalg::matrixop::areNear(direct_M[s], new_M[s], tolerance)); + EXPECT_TRUE(dca::linalg::matrixop::areNear(direct_M[s], new_M[s], tolerance)); // Compare with non submatrix walker. rng.setNewValues(setup_rngs); @@ -128,8 +128,8 @@ TYPED_TEST(CtintWalkerSubmatrixTest, doSteps) { walker_nosub.doStep(); // this needs to be std::abs because it could be a "complex" probability - EXPECT_NEAR(std::abs(walker.getAcceptanceProbability()), std::abs(walker_nosub.getAcceptanceProbability()), - tolerance); + EXPECT_NEAR(std::abs(walker.getAcceptanceProbability()), + std::abs(walker_nosub.getAcceptanceProbability()), tolerance); auto config_nosubm = walker_nosub.getWalkerConfiguration(); ASSERT_EQ(config.size(), config_nosubm.size()); diff --git a/test/unit/phys/dca_step/cluster_solver/ctint/walker/submatrix_input.json b/test/unit/phys/dca_step/cluster_solver/ctint/walker/submatrix_input.json index a6a5a4b05..88a9aeed0 100644 --- a/test/unit/phys/dca_step/cluster_solver/ctint/walker/submatrix_input.json +++ b/test/unit/phys/dca_step/cluster_solver/ctint/walker/submatrix_input.json @@ -9,16 +9,18 @@ "physics" : { - "beta" : 2, + "beta" : 4, "chemical-potential" : 0 }, "bilayer-Hubbard-model": { - "t" : 0, - "U" : 0, - "V" : 0, - "V-prime" : 2 + "t" : 1.0, + "t_prime" : 0.1, + "t_perp" : 0.01, + "U" : 1.0, + "V" : 1.0, + "V-prime" : 2.0 }, "Hund-model": diff --git a/test/unit/phys/dca_step/cluster_solver/ctint/walker/walker_wrapper_submatrix.hpp b/test/unit/phys/dca_step/cluster_solver/ctint/walker/walker_wrapper_submatrix.hpp index 638a35ac6..6393815cd 100644 --- a/test/unit/phys/dca_step/cluster_solver/ctint/walker/walker_wrapper_submatrix.hpp +++ b/test/unit/phys/dca_step/cluster_solver/ctint/walker/walker_wrapper_submatrix.hpp @@ -50,7 +50,7 @@ struct WalkerSelector { #endif // DCA_HAVE_GPU using namespace dca::phys::solver::ctint; - template +template struct WalkerWrapperSubmatrix : public WalkerSelector::type { using BaseClass = typename WalkerSelector::type; using Scalar = SCALAR; @@ -59,26 +59,26 @@ struct WalkerWrapperSubmatrix : public WalkerSelector& d_matrix_builder) - : BaseClass(parameters_ref, dca::phys::DcaData(parameters_ref), rng_ref, d_matrix_builder, 0), + WalkerWrapperSubmatrix(/*const*/ Parameters& parameters_ref, Rng& rng_ref, + DMatrixBuilder& d_matrix_builder) + : BaseClass(parameters_ref, dca::phys::DcaData(parameters_ref), rng_ref, + d_matrix_builder, 0), streams_(3) { BaseClass::initialize(0); + } + WalkerWrapperSubmatrix(/*const*/ Parameters& parameters_ref, + const dca::phys::DcaData& data, Rng& rng_ref, + DMatrixBuilder& d_matrix_builder) + : BaseClass(parameters_ref, data, rng_ref, d_matrix_builder, 0), streams_(3) { + BaseClass::initialize(0); } using Matrix = dca::linalg::Matrix; using MatrixPair = std::array; MatrixPair getM() { - std::array, 2> M; - - BaseClass::computeM(M); -#ifdef DCA_HAVE_GPU - checkRC(cudaDeviceSynchronize()); -#endif - - std::array, 2> M_copy{M[0], M[1]}; - return M_copy; + return BaseClass::getM(); } const auto& getWalkerConfiguration() const { diff --git a/test/unit/phys/dca_step/cluster_solver/shared_tools/accumulation/tp/input_4x4.json b/test/unit/phys/dca_step/cluster_solver/shared_tools/accumulation/tp/input_4x4.json index 2d27da0f8..6b997974b 100644 --- a/test/unit/phys/dca_step/cluster_solver/shared_tools/accumulation/tp/input_4x4.json +++ b/test/unit/phys/dca_step/cluster_solver/shared_tools/accumulation/tp/input_4x4.json @@ -9,14 +9,18 @@ "physics" : { - "beta" : 2, + "beta" : 4, "chemical-potential" : 0 }, "bilayer-Hubbard-model": { - "t" : 1, - "U" : 2 + "t" : 1.0, + "t_prime" : 0.1, + "t_perp" : 0.01, + "U" : 1.0, + "V" : 1.0, + "V-prime" : 2.0 }, "Hund-model": diff --git a/test/unit/phys/dca_step/cluster_solver/shared_tools/accumulation/tp/tp_accumulator_singleband_test.cpp b/test/unit/phys/dca_step/cluster_solver/shared_tools/accumulation/tp/tp_accumulator_singleband_test.cpp index 53a198222..2521ff7f3 100644 --- a/test/unit/phys/dca_step/cluster_solver/shared_tools/accumulation/tp/tp_accumulator_singleband_test.cpp +++ b/test/unit/phys/dca_step/cluster_solver/shared_tools/accumulation/tp/tp_accumulator_singleband_test.cpp @@ -46,10 +46,8 @@ using ConfigGenerator = dca::testing::AccumulationTest; using Configuration = ConfigGenerator::Configuration; using Sample = ConfigGenerator::Sample; -using Scalar = double; - using TpAccumulatorSinglebandTest = - dca::testing::G0Setup; + dca::testing::G0Setup; TEST_F(TpAccumulatorSinglebandTest, Accumulate) { const std::array n{17, 17}; @@ -77,9 +75,10 @@ TEST_F(TpAccumulatorSinglebandTest, Accumulate) { func_names[dca::phys::FourPointType::PARTICLE_HOLE_CHARGE] = "G4_ph_charge"; func_names[dca::phys::FourPointType::PARTICLE_PARTICLE_UP_DOWN] = "G4_pp_up_down"; - for (const dca::phys::FourPointType type : - {dca::phys::FourPointType::PARTICLE_HOLE_TRANSVERSE, dca::phys::FourPointType::PARTICLE_HOLE_MAGNETIC, - dca::phys::FourPointType::PARTICLE_HOLE_CHARGE, dca::phys::FourPointType::PARTICLE_PARTICLE_UP_DOWN}) { + for (const dca::phys::FourPointType type : {dca::phys::FourPointType::PARTICLE_HOLE_TRANSVERSE, + dca::phys::FourPointType::PARTICLE_HOLE_MAGNETIC, + dca::phys::FourPointType::PARTICLE_HOLE_CHARGE, + dca::phys::FourPointType::PARTICLE_PARTICLE_UP_DOWN}) { parameters_.set_four_point_channel(type); dca::phys::solver::accumulator::TpAccumulator accumulator( diff --git a/test/unit/phys/dca_step/cluster_solver/shared_tools/accumulation/tp/tp_accumulator_singleband_test_baseline.hdf5 b/test/unit/phys/dca_step/cluster_solver/shared_tools/accumulation/tp/tp_accumulator_singleband_test_baseline.hdf5 index dca3ef510..d3618ad70 100644 Binary files a/test/unit/phys/dca_step/cluster_solver/shared_tools/accumulation/tp/tp_accumulator_singleband_test_baseline.hdf5 and b/test/unit/phys/dca_step/cluster_solver/shared_tools/accumulation/tp/tp_accumulator_singleband_test_baseline.hdf5 differ diff --git a/test/unit/phys/dca_step/cluster_solver/test_setup.hpp b/test/unit/phys/dca_step/cluster_solver/test_setup.hpp index b37cc0837..a86fad3e8 100644 --- a/test/unit/phys/dca_step/cluster_solver/test_setup.hpp +++ b/test/unit/phys/dca_step/cluster_solver/test_setup.hpp @@ -30,6 +30,7 @@ #include "dca/phys/models/analytic_hamiltonians/hund_lattice.hpp" #include "dca/phys/models/analytic_hamiltonians/rashba_hubbard.hpp" #include "dca/phys/models/analytic_hamiltonians/Moire_Hubbard.hpp" +#include "dca/phys/models/analytic_hamiltonians/fe_as_lattice.hpp" #include "dca/parallel/no_concurrency/no_concurrency.hpp" #include "dca/phys/models/analytic_hamiltonians/Kagome_hubbard.hpp" #include "dca/parallel/no_threading/no_threading.hpp" @@ -50,6 +51,7 @@ using LatticeHund = phys::models::HundLattice; using LatticeKagome = phys::models::KagomeHubbard>; using LatticeRashba = phys::models::RashbaHubbard>; using LatticeMoireHubbard = phys::models::moire_hubbard>; +using LatticeFeAs = phys::models::FeAsLattice; template @@ -196,10 +195,8 @@ struct G0SetupFromParam { } void TearDown() {} - }; - } // namespace testing } // namespace dca