Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
217 changes: 105 additions & 112 deletions Grid/cshift/Cshift_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,21 +31,73 @@ NAMESPACE_BEGIN(Grid);

extern std::vector<std::pair<int,int> > Cshift_table;
extern deviceVector<std::pair<int,int> > Cshift_table_device;
extern std::vector<int> Cshift_vector;
extern deviceVector<int> Cshift_vector_device;

inline std::pair<int,int> *MapCshiftTable(void)
// Copy Cshift map object (table or vector) to device
template<class vobj>
inline void MapCshiftCopy(std::vector<vobj> &Cshift_obj, deviceVector<vobj> &Cshift_obj_device)
{
// GPU version
uint64_t sz=Cshift_table.size();
if (Cshift_table_device.size()!=sz ) {
Cshift_table_device.resize(sz);
// GPU version only
uint64_t sz=Cshift_obj.size();
if (Cshift_obj_device.size()!=sz ) {
Cshift_obj_device.resize(sz);
}
acceleratorCopyToDevice((void *)&Cshift_table[0],
(void *)&Cshift_table_device[0],
sizeof(Cshift_table[0])*sz);
acceleratorCopyToDevice((void *)&Cshift_obj[0],
(void *)&Cshift_obj_device[0],
sizeof(Cshift_obj[0])*sz);

return &Cshift_table_device[0];
// CPU version use identify map
}

// Copy Cshift map object (table or vector) to device and return pointer to device copy
template<class vobj>
inline vobj *MapCshift(std::vector<vobj> &Cshift_obj, deviceVector<vobj> &Cshift_obj_device)
{
MapCshiftCopy<vobj>(Cshift_obj, Cshift_obj_device);

return &Cshift_obj_device[0];
}

// Calculate Cshift_vector
template<class vobj>
void CalculateCshiftVector(Lattice<vobj> &ret, const Lattice<vobj> &rhs, int dimension, int cbmask)
{
GridBase *grid = rhs.Grid();

if ( !grid->CheckerBoarded(dimension) ) {
cbmask=0x3;
}

int e1=grid->_slice_nblock[dimension]; // clearly loop invariant for icpc
int e2=grid->_slice_block[dimension];
int stride = grid->_slice_stride[dimension];

if (Cshift_vector.size() < e1*e2) Cshift_vector.resize(e1*e2); // Let it grow to biggest

int ent = 0;
if(cbmask == 0x3 ){
for(int n=0;n<e1;n++){
for(int b=0;b<e2;b++){
int o =n*stride+b;
Cshift_vector[ent++] = o;
}
}
} else {
for(int n=0;n<e1;n++){
for(int b=0;b<e2;b++){
int o =n*stride+b;
int ocb=1<<ret.Grid()->CheckerBoardFromOindex(o);
if ( ocb&cbmask ) {
Cshift_vector[ent++] = o;
}
}
}
}

if (ent < Cshift_vector.size()) Cshift_vector.resize(ent); // trim vector to actual size (relevant for checkerboarded dimensions)
}


///////////////////////////////////////////////////////////////////
// Gather for when there is no need to SIMD split
///////////////////////////////////////////////////////////////////
Expand Down Expand Up @@ -89,7 +141,7 @@ Gather_plane_simple (const Lattice<vobj> &rhs,deviceVector<vobj> &buffer,int dim
}
{
auto buffer_p = & buffer[0];
auto table = MapCshiftTable();
auto table = MapCshift<std::pair<int,int> >(Cshift_table, Cshift_table_device);
autoView(rhs_v , rhs, AcceleratorRead);
accelerator_for(i,ent,vobj::Nsimd(),{
coalescedWrite(buffer_p[table[i].first],coalescedRead(rhs_v[table[i].second]));
Expand Down Expand Up @@ -201,7 +253,7 @@ template<class vobj> void Scatter_plane_simple (Lattice<vobj> &rhs,deviceVector<

{
auto buffer_p = & buffer[0];
auto table = MapCshiftTable();
auto table = MapCshift<std::pair<int,int> >(Cshift_table, Cshift_table_device);
autoView( rhs_v, rhs, AcceleratorWrite);
accelerator_for(i,ent,vobj::Nsimd(),{
coalescedWrite(rhs_v[table[i].first],coalescedRead(buffer_p[table[i].second]));
Expand Down Expand Up @@ -263,94 +315,32 @@ template<class vobj> void Scatter_plane_merge(Lattice<vobj> &rhs,ExtractPointerA

template<class vobj> void Copy_plane(Lattice<vobj>& lhs,const Lattice<vobj> &rhs, int dimension,int lplane,int rplane,int cbmask)
{
int rd = rhs.Grid()->_rdimensions[dimension];

if ( !rhs.Grid()->CheckerBoarded(dimension) ) {
cbmask=0x3;
}

int ro = rplane*rhs.Grid()->_ostride[dimension]; // base offset for start of plane
int lo = lplane*lhs.Grid()->_ostride[dimension]; // base offset for start of plane

int e1=rhs.Grid()->_slice_nblock[dimension]; // clearly loop invariant for icpc
int e2=rhs.Grid()->_slice_block[dimension];
int stride = rhs.Grid()->_slice_stride[dimension];

if(Cshift_table.size()<e1*e2) Cshift_table.resize(e1*e2); // Let it grow to biggest

int ent=0;

if(cbmask == 0x3 ){
for(int n=0;n<e1;n++){
for(int b=0;b<e2;b++){
int o =n*stride+b;
Cshift_table[ent++] = std::pair<int,int>(lo+o,ro+o);
}
}
} else {
for(int n=0;n<e1;n++){
for(int b=0;b<e2;b++){
int o =n*stride+b;
int ocb=1<<lhs.Grid()->CheckerBoardFromOindex(o);
if ( ocb&cbmask ) {
Cshift_table[ent++] = std::pair<int,int>(lo+o,ro+o);
}
}
}
}
auto table = &Cshift_vector_device[0];

{
auto table = MapCshiftTable();
autoView(rhs_v , rhs, AcceleratorRead);
autoView(lhs_v , lhs, AcceleratorWrite);
accelerator_for(i,ent,vobj::Nsimd(),{
coalescedWrite(lhs_v[table[i].first],coalescedRead(rhs_v[table[i].second]));
});
}
autoView(rhs_v , rhs, AcceleratorRead);
autoView(lhs_v , lhs, AcceleratorWrite);
accelerator_for(i,Cshift_vector.size(),vobj::Nsimd(),{
coalescedWrite(lhs_v[table[i]+lo],coalescedRead(rhs_v[table[i]+ro]));
});
}

template<class vobj> void Copy_plane_permute(Lattice<vobj>& lhs,const Lattice<vobj> &rhs, int dimension,int lplane,int rplane,int cbmask,int permute_type)
{
int rd = rhs.Grid()->_rdimensions[dimension];

if ( !rhs.Grid()->CheckerBoarded(dimension) ) {
cbmask=0x3;
}

int ro = rplane*rhs.Grid()->_ostride[dimension]; // base offset for start of plane
int lo = lplane*lhs.Grid()->_ostride[dimension]; // base offset for start of plane

int e1=rhs.Grid()->_slice_nblock[dimension];
int e2=rhs.Grid()->_slice_block [dimension];
int stride = rhs.Grid()->_slice_stride[dimension];

if(Cshift_table.size()<e1*e2) Cshift_table.resize(e1*e2); // Let it grow to biggest

int ent=0;

if ( cbmask == 0x3 ) {
for(int n=0;n<e1;n++){
for(int b=0;b<e2;b++){
int o =n*stride;
Cshift_table[ent++] = std::pair<int,int>(lo+o+b,ro+o+b);
}}
} else {
for(int n=0;n<e1;n++){
for(int b=0;b<e2;b++){
int o =n*stride;
int ocb=1<<lhs.Grid()->CheckerBoardFromOindex(o+b);
if ( ocb&cbmask ) Cshift_table[ent++] = std::pair<int,int>(lo+o+b,ro+o+b);
}}
}

{
auto table = MapCshiftTable();
autoView( rhs_v, rhs, AcceleratorRead);
autoView( lhs_v, lhs, AcceleratorWrite);
accelerator_for(i,ent,1,{
permute(lhs_v[table[i].first],rhs_v[table[i].second],permute_type);
auto table = &Cshift_vector_device[0];
autoView( rhs_v, rhs, AcceleratorRead);
autoView( lhs_v, lhs, AcceleratorWrite);
accelerator_for(i,Cshift_vector.size(),1,{
permute(lhs_v[table[i]+lo],rhs_v[table[i]+ro],permute_type);
});
}
}

//////////////////////////////////////////////////////
Expand Down Expand Up @@ -383,51 +373,54 @@ template<class vobj> void Cshift_local(Lattice<vobj> &ret,const Lattice<vobj> &r
// Map to always positive shift modulo global full dimension.
shift = (shift+fd)%fd;

int cb= (cbmask==0x2)? Odd : Even;
int sshift = grid->CheckerBoardShiftForCB(rhs.Checkerboard(),dimension,shift,cb);

// the permute type
ret.Checkerboard() = grid->CheckerBoardDestination(rhs.Checkerboard(),shift,dimension);
int permute_dim =grid->PermuteDim(dimension);
int permute_type=grid->PermuteType(dimension);
int permute_type_dist;

for(int x=0;x<rd;x++){
// wrap is whether sshift > rd.
// num is sshift mod rd.
//
// shift 7
//
// XoXo YcYc
// oXoX cYcY
// XoXo YcYc
// oXoX cYcY
//
// sshift --
//
// XX YY ; 3
// XX YY ; 0
// XX YY ; 3
// XX YY ; 0
//
int wrap = sshift/rd; wrap=wrap % ly;
int num = sshift%rd;

// Calculate Cshift_vector - it's the same for all slices
CalculateCshiftVector<vobj>(ret, rhs, dimension, cbmask);
// Copy it to the device
MapCshiftCopy<int>(Cshift_vector, Cshift_vector_device);

// int o = 0;
int bo = x * grid->_ostride[dimension];
int cb= (cbmask==0x2)? Odd : Even;
for(int x=0;x<rd;x++){

int sshift = grid->CheckerBoardShiftForCB(rhs.Checkerboard(),dimension,shift,cb);
int sx = (x+sshift)%rd;

// wrap is whether sshift > rd.
// num is sshift mod rd.
//
// shift 7
//
// XoXo YcYc
// oXoX cYcY
// XoXo YcYc
// oXoX cYcY
//
// sshift --
//
// XX YY ; 3
// XX YY ; 0
// XX YY ; 3
// XX YY ; 0
//
int permute_slice=0;
if(permute_dim){
int wrap = sshift/rd; wrap=wrap % ly;
int num = sshift%rd;

if ( x< rd-num ) permute_slice=wrap;
else permute_slice = (wrap+1)%ly;

if ( (ly>2) && (permute_slice) ) {
assert(permute_type & RotateBit);
assert(permute_type & RotateBit);
permute_type_dist = permute_type|permute_slice;
} else {
permute_type_dist = permute_type;
permute_type_dist = permute_type;
}
}

Expand Down
6 changes: 6 additions & 0 deletions Grid/cshift/Cshift_mpi.h
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,12 @@ template<class vobj> void Cshift_comms(Lattice<vobj> &ret,const Lattice<vobj> &r

int cb= (cbmask==0x2)? Odd : Even;
int sshift= rhs.Grid()->CheckerBoardShiftForCB(rhs.Checkerboard(),dimension,shift,cb);

// Calculate Cshift_vector - it's the same for all slices
CalculateCshiftVector<vobj>(ret, rhs, dimension, cbmask);
// Copy it to the device
MapCshiftCopy<int>(Cshift_vector, Cshift_vector_device);

RealD tcopy=0.0;
RealD tgather=0.0;
RealD tscatter=0.0;
Expand Down
2 changes: 2 additions & 0 deletions Grid/cshift/Cshift_table.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,6 @@
NAMESPACE_BEGIN(Grid);
std::vector<std::pair<int,int> > Cshift_table;
deviceVector<std::pair<int,int> > Cshift_table_device;
std::vector<int> Cshift_vector;
deviceVector<int> Cshift_vector_device;
NAMESPACE_END(Grid);
9 changes: 6 additions & 3 deletions Grid/qcd/smearing/WilsonFlow.h
Original file line number Diff line number Diff line change
Expand Up @@ -207,11 +207,14 @@ std::vector<RealD> WilsonFlowBase<Gimpl>::flowMeasureEnergyDensityCloverleaf(con
}

template <class Gimpl>
void WilsonFlowBase<Gimpl>::setDefaultMeasurements(int topq_meas_interval){
addMeasurement(1, [](int step, RealD t, const typename Gimpl::GaugeField &U){
void WilsonFlowBase<Gimpl>::setDefaultMeasurements(int meas_interval){
addMeasurement(meas_interval, [](int step, RealD t, const typename Gimpl::GaugeField &U){
std::cout << GridLogMessage << "[WilsonFlow] Energy density (plaq) : " << step << " " << t << " " << energyDensityPlaquette(t,U) << std::endl;
});
addMeasurement(topq_meas_interval, [](int step, RealD t, const typename Gimpl::GaugeField &U){
addMeasurement(meas_interval, [](int step, RealD t, const typename Gimpl::GaugeField &U){
std::cout << GridLogMessage << "[WilsonFlow] Energy density (cloverleaf) : " << step << " " << t << " " << energyDensityCloverleaf(t,U) << std::endl;
});
addMeasurement(meas_interval, [](int step, RealD t, const typename Gimpl::GaugeField &U){
std::cout << GridLogMessage << "[WilsonFlow] Top. charge : " << step << " " << WilsonLoops<Gimpl>::TopologicalCharge(U) << std::endl;
});
}
Expand Down
10 changes: 8 additions & 2 deletions HMC/FTHMC2p1f.cc
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,20 @@ directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid.h>

#if Nc == 3
#include <Grid/qcd/smearing/GaugeConfigurationMasked.h>
#include <Grid/qcd/smearing/JacobianAction.h>
#endif

using namespace Grid;

int main(int argc, char **argv)
{
#if Nc != 3
#warning FTHMC2p1f will not work for Nc != 3
std::cout << "This program will currently only work for Nc == 3." << std::endl;
#else
std::cout << std::setprecision(12);

Grid_init(&argc, &argv);
Expand Down Expand Up @@ -220,7 +227,6 @@ int main(int argc, char **argv)
TheHMC.Run(SmearingPolicy); // for smearing

Grid_finalize();
#endif
} // main



9 changes: 9 additions & 0 deletions HMC/FTHMC2p1f_3GeV.cc
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,22 @@ See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */

#include <Grid/Grid.h>

#if Nc == 3
#include <Grid/qcd/smearing/GaugeConfigurationMasked.h>
#include <Grid/qcd/smearing/JacobianAction.h>
#endif

using namespace Grid;

int main(int argc, char **argv)
{
#if Nc != 3
#warning FTHMC2p1f_3GeV will not work for Nc != 3
std::cout << "This program will currently only work for Nc == 3." << std::endl;
#else
std::cout << std::setprecision(12);

Grid_init(&argc, &argv);
Expand Down Expand Up @@ -220,6 +228,7 @@ int main(int argc, char **argv)
TheHMC.Run(SmearingPolicy); // for smearing

Grid_finalize();
#endif
} // main


Expand Down
Loading