Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions build-aux/frontier_build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,13 @@ cmake -DDCA_WITH_CUDA=off -DDCA_WITH_HIP=ON \
-DTEST_RUNNER="srun" \
-DGPU_TARGETS=gfx90a \
-DAMDGPU_TARGETS=gfx90a \
-DCMAKE_BUILD_TYPE=Release \
-DCMAKE_C_COMPILER=mpicc \
-DCMAKE_CXX_COMPILER=mpic++ \
-DCMAKE_HIP_COMPILER=/opt/rocm-6.3.1/llvm/bin/clang++ \
-DCMAKE_INSTALL_PREFIX=$INST \
-DCMAKE_PREFIX_PATH="${CMAKE_PREFIX_PATH}" \
-DCMAKE_HIP_LINK_FLAGS=--hip-link \
-DCMAKE_EXPORT_COMPILE_COMMANDSS=1 \
-GNinja \
..
6 changes: 6 additions & 0 deletions include/dca/linalg/blas/use_device.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -85,24 +85,28 @@ struct UseDevice<GPU> {
template <typename ScalarType>
inline static void axpy(int n, ScalarType alpha, const ScalarType* x, int incx, ScalarType* y,
int incy, int thread_id, int stream_id) {
assert(stream_id == 0);
cublas::axpy(util::getHandle(thread_id, stream_id), n, alpha, x, incx, y, incy);
}

template <typename ScalarType>
inline static void copy(int n, const ScalarType* x, int incx, ScalarType* y, int incy,
int thread_id, int stream_id) {
assert(stream_id == 0);
cublas::copy(util::getHandle(thread_id, stream_id), n, x, incx, y, incy);
}

template <typename ScalarType>
inline static void scal(int n, ScalarType alpha, ScalarType* x, int incx, int thread_id,
int stream_id) {
assert(stream_id == 0);
cublas::scal(util::getHandle(thread_id, stream_id), n, alpha, x, incx);
}

template <typename ScalarType>
inline static void swap(int n, ScalarType* x, int incx, ScalarType* y, int incy, int thread_id,
int stream_id) {
assert(stream_id == 0);
cublas::swap(util::getHandle(thread_id, stream_id), n, x, incx, y, incy);
}

Expand All @@ -112,6 +116,7 @@ struct UseDevice<GPU> {
ScalarType alpha, const ScalarType* a, int lda, const ScalarType* b,
int ldb, ScalarType beta, ScalarType* c, int ldc, int thread_id,
int stream_id) {
assert(stream_id == 0);
cublas::gemm(util::getHandle(thread_id, stream_id), transa, transb, m, n, k, alpha, a, lda, b,
ldb, beta, c, ldc);
}
Expand All @@ -120,6 +125,7 @@ struct UseDevice<GPU> {
inline static void trsm(const char* side, const char* uplo, const char* transa, const char* diag,
int m, int n, ScalarType alpha, const ScalarType* a, int lda,
ScalarType* b, int ldb, int thread_id, int stream_id) {
assert(stream_id == 0);
cublas::trsm(util::getHandle(thread_id, stream_id), side, uplo, transa, diag, m, n, alpha, a,
lda, b, ldb);
}
Expand Down
2 changes: 2 additions & 0 deletions include/dca/linalg/matrixop.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -679,6 +679,7 @@ template <typename Scalar, DeviceType device_name, template <typename, DeviceTyp
void gemm(char transa, char transb, Scalar alpha, const MatrixA<Scalar, device_name, ALLOC1>& a,
const MatrixB<Scalar, device_name, ALLOC2>& b, Scalar beta,
MatrixC<Scalar, device_name, ALLOC3>& c, int thread_id = 0, int stream_id = 0) {
assert(stream_id == 0);
int m = c.nrRows();
int n = c.nrCols();
int k;
Expand Down Expand Up @@ -719,6 +720,7 @@ template <typename Scalar, DeviceType device_name, class ALLOC,
inline void gemm(const MatrixA<Scalar, device_name, ALLOC>& a,
const MatrixB<Scalar, device_name, ALLOC>& b,
MatrixC<Scalar, device_name, ALLOC>& c, int thread_id = 0, int stream_id = 0) {
assert(stream_id == 0);
gemm<Scalar, device_name>('N', 'N', 1., a, b, 0., c, thread_id, stream_id);
}

Expand Down
2 changes: 1 addition & 1 deletion include/dca/linalg/util/stream_container.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ class StreamContainer {
}

private:
constexpr static std::size_t streams_per_thread_ = 2;
constexpr static std::size_t streams_per_thread_ = 1;
std::vector<std::array<GpuStream, streams_per_thread_>> streams_;
};

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -101,8 +101,6 @@ class G0Interpolation<dca::linalg::GPU, Parameters> : public G0InterpolationBase
linalg::MultiVector<linalg::GPU, int, int, Real> g0_labels_gpu_;

using Base::beta;

linalg::util::GpuEvent config_copied_;
};

template <typename Parameters>
Expand Down Expand Up @@ -239,7 +237,7 @@ void G0Interpolation<dca::linalg::GPU, Parameters>::uploadConfiguration(
const Configuration& configuration) {
const int configuration_size = configuration.size();

config_copied_.block();
linalg::util::getStream(thread_id, 0).sync();
g0_labels_cpu_.resizeNoCopy(configuration_size);
g0_labels_gpu_.resizeNoCopy(configuration_size);

Expand All @@ -255,7 +253,6 @@ void G0Interpolation<dca::linalg::GPU, Parameters>::uploadConfiguration(

const auto& stream = linalg::util::getStream(thread_id, stream_id);
g0_labels_gpu_.setAsync(g0_labels_cpu_, stream);
config_copied_.record(stream);
}

} // namespace ctaux
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ class DeviceConfigurationManager {
void DeviceConfigurationManager::upload(const SolverConfiguration& config, int thread_id, int spin) {
assert(spin >= 0 and spin < 2);
const auto& entries = config.getSector(spin).entries_;
device_entries_[spin].setAsync(entries, linalg::util::getStream(thread_id, spin));
device_entries_[spin].setAsync(entries, linalg::util::getStream(thread_id, 0));
device_pointers_[spin].data = device_entries_[spin].ptr();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ class InteractionVertices {
enum PartnershipType { NONE, SAME_SITE, ALL_SITES };

std::vector<double> cumulative_weigths_;
double total_weigth_ = 0;
double total_weigth_{0};
bool interband_propagator_ = false;
PartnershipType partnership_type_ = NONE;
};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -92,10 +92,10 @@ class CtintWalkerBase {
virtual void doStep() = 0;

virtual void doSweep() = 0;
template<linalg::DeviceType DEVICE>

template <linalg::DeviceType DEVICE>
void setMFromConfigImpl(DMatrixBuilder<DEVICE, Scalar>& d_matrix_builder);

bool is_thermalized() const {
return thermalized_;
}
Expand Down Expand Up @@ -165,17 +165,15 @@ class CtintWalkerBase {
return flop;
}

const auto& get_stream(int s) const {
assert(s >= 0 && s < 2);
return *streams_[s];
const auto& get_stream() const {
return stream_;
}

static void sumConcurrency(const Concurrency&) {}

void writeAlphas() const;

static void setInteractionVertices(const Data& data,
const Parameters& parameters);
static void setInteractionVertices(const Data& data, const Parameters& parameters);

protected:
// typedefs
Expand All @@ -191,7 +189,7 @@ class CtintWalkerBase {
const Concurrency& concurrency_;

const int thread_id_;
std::array<linalg::util::GpuStream*, 2> streams_;
linalg::util::GpuStream* stream_;

Rng& rng_;
SolverConfiguration configuration_;
Expand Down Expand Up @@ -230,16 +228,12 @@ class CtintWalkerBase {
};

template <class Parameters, DistType DIST>
CtintWalkerBase<Parameters, DIST>::CtintWalkerBase(const Parameters& parameters_ref,
Rng& rng_ref, int id)
CtintWalkerBase<Parameters, DIST>::CtintWalkerBase(const Parameters& parameters_ref, Rng& rng_ref,
int id)
: parameters_(parameters_ref),
concurrency_(parameters_.get_concurrency()),

thread_id_(id),

streams_{&linalg::util::getStreamContainer()(thread_id_, 0),
&linalg::util::getStreamContainer()(thread_id_, 1)},

stream_{&linalg::util::getStreamContainer()(thread_id_, 0)},
rng_(rng_ref),

configuration_(parameters_.get_beta(), Bdmn::dmn_size(), vertices_,
Expand All @@ -249,7 +243,7 @@ CtintWalkerBase<Parameters, DIST>::CtintWalkerBase(const Parameters& parameters_
total_interaction_(vertices_.integratedInteraction()) {}

template <class Parameters, DistType DIST>
void CtintWalkerBase<Parameters,DIST>::initialize(int iteration) {
void CtintWalkerBase<Parameters, DIST>::initialize(int iteration) {
assert(total_interaction_);
phase_.reset();

Expand All @@ -269,7 +263,7 @@ void CtintWalkerBase<Parameters,DIST>::initialize(int iteration) {
}

template <class Parameters, DistType DIST>
void CtintWalkerBase<Parameters,DIST>::updateSweepAverages() {
void CtintWalkerBase<Parameters, DIST>::updateSweepAverages() {
order_avg_.addSample(order());
sign_avg_.addSample(phase_.getSign());
// Track avg order for the final number of steps / sweep.
Expand All @@ -293,9 +287,8 @@ void CtintWalkerBase<Parameters,DIST>::updateSweepAverages() {
// }
// }


template <class Parameters, DistType DIST>
void CtintWalkerBase<Parameters,DIST>::updateShell(int meas_id, int meas_to_do) const {
void CtintWalkerBase<Parameters, DIST>::updateShell(int meas_id, int meas_to_do) const {
if (concurrency_.id() == concurrency_.first() && meas_id > 1 &&
(meas_id % dca::util::ceilDiv(meas_to_do, 20)) == 0) {
std::cout << "\t\t\t" << int(double(meas_id) / double(meas_to_do) * 100) << " % completed \t ";
Expand All @@ -312,7 +305,7 @@ void CtintWalkerBase<Parameters,DIST>::updateShell(int meas_id, int meas_to_do)
}

template <class Parameters, DistType DIST>
void CtintWalkerBase<Parameters,DIST>::printSummary() const {
void CtintWalkerBase<Parameters, DIST>::printSummary() const {
std::cout << "\n"
<< "Walker: process ID = " << concurrency_.id() << ", thread ID = " << thread_id_ << "\n"
<< "-------------------------------------------\n";
Expand All @@ -329,8 +322,8 @@ void CtintWalkerBase<Parameters,DIST>::printSummary() const {
}

template <class Parameters, DistType DIST>
void CtintWalkerBase<Parameters,DIST>::setInteractionVertices(const Data& data,
const Parameters& parameters) {
void CtintWalkerBase<Parameters, DIST>::setInteractionVertices(const Data& data,
const Parameters& parameters) {
vertices_.reset();
vertices_.initialize(parameters.getDoubleUpdateProbability(), parameters.getAllSitesPartnership());
vertices_.initializeFromHamiltonian(data.H_interactions);
Expand All @@ -341,11 +334,11 @@ void CtintWalkerBase<Parameters,DIST>::setInteractionVertices(const Data& data,
}

template <class Parameters, DistType DIST>
void CtintWalkerBase<Parameters,DIST>::computeM(MatrixPair& m_accum) const {
void CtintWalkerBase<Parameters, DIST>::computeM(MatrixPair& m_accum) const {
m_accum = M_;
}

// template<class WALKER, linalg::DeviceType DEVICE>
// template<class WALKER, linalg::DeviceType DEVICE>
// void setMFromConfigHelper(WALKER& walker, DMatrixBuilder<DEVICE, Scalar>& d_matrix_builder) {
// walker.mc_log_weight_ = 0.;
// walker.phase_.reset();
Expand Down Expand Up @@ -379,11 +372,11 @@ void CtintWalkerBase<Parameters,DIST>::computeM(MatrixPair& m_accum) const {
// walker.phase_.multiply(term);
// }
// }



template <class Parameters, DistType DIST>
template <linalg::DeviceType DEVICE>
void CtintWalkerBase<Parameters, DIST>::setMFromConfigImpl(DMatrixBuilder<DEVICE, Scalar>& d_matrix_builder) {
void CtintWalkerBase<Parameters, DIST>::setMFromConfigImpl(
DMatrixBuilder<DEVICE, Scalar>& d_matrix_builder) {
mc_log_weight_ = 0.;
phase_.reset();

Expand Down Expand Up @@ -416,7 +409,7 @@ void CtintWalkerBase<Parameters, DIST>::setMFromConfigImpl(DMatrixBuilder<DEVICE
phase_.multiply(term);
}
}

} // namespace ctint
} // namespace solver
} // namespace phys
Expand Down
Loading
Loading