From e0a51be8907b463fa8d776ce76242f78b825e227 Mon Sep 17 00:00:00 2001 From: Matias Berdakin Date: Thu, 5 Feb 2026 09:49:36 -0300 Subject: [PATCH 1/2] MPI implementation of timeprop (#1390) Co-authored-by: Franco Bonafe Co-authored-by: Tammo van der Heide Co-authored-by: Franco Bonafe --- AUTHORS.rst | 2 + doc/dftb+/manual/dftbp.tex | 2 +- src/dftbp/api/mm/mmapi.F90 | 2 +- src/dftbp/dftb/populations.F90 | 45 +- src/dftbp/dftbplus/initprogram.F90 | 18 +- src/dftbp/dftbplus/mainapi.F90 | 7 +- src/dftbp/dftbplus/parser.F90 | 7 - src/dftbp/extlibs/scalapackfx.F90 | 4 +- src/dftbp/timedep/dynamicsrestart.F90 | 225 ++++- src/dftbp/timedep/timeprop.F90 | 939 ++++++++++++++---- test/app/dftb+/tests | 41 +- test/app/dftb+/timeprop/C4H6_rs/dftb_in.hsd | 6 + .../timeprop/C4H6_rs_Singlet/dftb_in.hsd | 5 + .../timeprop/C4H6_rs_Triplet/dftb_in.hsd | 4 + test/app/dftb+/timeprop/C60_kick/dftb_in.hsd | 4 + .../dftb+/timeprop/C60_triplet/dftb_in.hsd | 6 + .../timeprop/CH3_dftbu_singlet/dftb_in.hsd | 4 + .../timeprop/GNR_periodic_kick/dftb_in.hsd | 1 + .../timeprop/GNR_periodic_laser/dftb_in.hsd | 1 + .../dftb+/timeprop/H2_exc_atoms/dftb_in.hsd | 5 + .../app/dftb+/timeprop/H2_singlet/dftb_in.hsd | 5 + .../app/dftb+/timeprop/H2_triplet/dftb_in.hsd | 6 + test/app/dftb+/timeprop/NO_onsite/dftb_in.hsd | 5 + test/app/dftb+/timeprop/NO_pSIC/dftb_in.hsd | 4 + .../dftb+/timeprop/PDI_charged/dftb_in.hsd | 5 + .../timeprop/benzene_circpol/dftb_in.hsd | 1 + .../timeprop/benzene_gaussian/dftb_in.hsd | 1 + .../timeprop/benzene_kick_bndpop/dftb_in.hsd | 1 + .../dftb+/timeprop/benzene_laser/dftb_in.hsd | 1 + .../timeprop/benzene_laser_gfn2/dftb_in.hsd | 12 +- .../app/dftb+/timeprop/benzene_rs/dftb_in.hsd | 1 + .../dftb+/timeprop/benzene_sin2/dftb_in.hsd | 1 + 32 files changed, 1134 insertions(+), 237 deletions(-) diff --git a/AUTHORS.rst b/AUTHORS.rst index fd7f1a877d..bf59e1e0b2 100644 --- a/AUTHORS.rst +++ b/AUTHORS.rst @@ -25,6 +25,8 @@ The DFTB+ development is being lead and coordinated by: The following people (in alphabetic order by their family names) have contributed to DFTB+ : +* Matías Berdakin (Universidad Nacional de Córdoba, Argentina) + * Franco Bonafé (Max Planck Institute for the Structure and Dynamics of Matter, Germany) diff --git a/doc/dftb+/manual/dftbp.tex b/doc/dftb+/manual/dftbp.tex index a382f38457..2215c6ce4e 100644 --- a/doc/dftb+/manual/dftbp.tex +++ b/doc/dftb+/manual/dftbp.tex @@ -5913,7 +5913,7 @@ \section{ElectronDynamics} Calculate the real-time propagation of both the electronic state of the system and the nuclei within the Ehfenfest approximation in the presence of an optional external time-dependent perturbation, as described in Ref.~\cite{realtime1}. -The real-time propagation is available with DFTB as well as xTB Hamiltonians. +The real-time propagation is available with DFTB as well as xTB Hamiltonians, and it can be run in parallel using OpenMP as well as MPI. However, calculations for periodic systems, hybrid functionals, DFTB+U, onsite corrections and ion dynamics are not implemented with MPI yet (only OpenMP parallelization can be used on those cases). \begin{ptable} \kw{Steps} &i & & - & \\ diff --git a/src/dftbp/api/mm/mmapi.F90 b/src/dftbp/api/mm/mmapi.F90 index 95786716d4..a1d6ffa66a 100644 --- a/src/dftbp/api/mm/mmapi.F90 +++ b/src/dftbp/api/mm/mmapi.F90 @@ -1243,7 +1243,7 @@ subroutine TDftbPlus_finalizeTimeProp(this) !> Instance class(TDftbPlus), intent(inout) :: this - call finalizeTimeProp(this%main) + call finalizeTimeProp(this%env, this%main) end subroutine TDftbPlus_finalizeTimeProp diff --git a/src/dftbp/dftb/populations.F90 b/src/dftbp/dftb/populations.F90 index 4f3c020b42..3f7e39a608 100644 --- a/src/dftbp/dftb/populations.F90 +++ b/src/dftbp/dftb/populations.F90 @@ -40,7 +40,8 @@ module dftbp_dftb_populations #:if WITH_SCALAPACK public :: denseMullikenRealBlacs public :: denseSubtractDensityOfAtomsRealNonperiodicBlacs,& - & denseSubtractDensityOfAtomsRealPeriodicBlacs + & denseSubtractDensityOfAtomsRealPeriodicBlacs,& + & denseSubtractDensityOfAtomsCmplxNonperiodicBlacs #:endif @@ -1046,6 +1047,48 @@ subroutine denseSubtractDensityOfAtomsRealNonperiodicBlacs(env, parallelKS, q0, end subroutine denseSubtractDensityOfAtomsRealNonperiodicBlacs + !> Subtracts superposition of atomic densities from distributed, dense, square, complex-valued + !! density matrix. + !! + !! For spin-polarized calculations, q0 is distributed equally to alpha and beta density matrices. + subroutine denseSubtractDensityOfAtomsCmplxNonperiodicBlacs(env, parallelKS, q0, denseDesc, rho) + + !> Environment settings + type(TEnvironment), intent(in) :: env + + !> The k-points and spins to process + type(TParallelKS), intent(in) :: parallelKS + + !> Reference atom populations + real(dp), intent(in) :: q0(:,:,:) + + !> Dense matrix descriptor + type(TDenseDescr), intent(in) :: denseDesc + + !> Spin polarized (lower triangular) matrix + complex(dp), intent(inout) :: rho(:,:,:) + + integer :: nAtom, iKS, iAt, iOrbStart, nOrb, iOrb + complex(dp) :: tmp(size(q0, dim=1), size(q0, dim=1)) + + nAtom = size(q0, dim=2) + + do iKS = 1, parallelKS%nLocalKS + do iAt = 1, nAtom + iOrbStart = denseDesc%iAtomStart(iAt) + nOrb = denseDesc%iAtomStart(iAt + 1) - iOrbStart + tmp(:,:) = (0.0_dp, 0.0_dp) + do iOrb = 1, nOrb + tmp(iOrb, iOrb) = -q0(iOrb, iAt, 1) + end do + call scalafx_addl2g(env%blacs%orbitalGrid, tmp(1:nOrb, 1:nOrb), denseDesc%blacsOrbSqr,& + & iOrbStart, iOrbStart, rho(:,:,iKS)) + end do + end do + + end subroutine denseSubtractDensityOfAtomsCmplxNonperiodicBlacs + + !> Subtracts superposition of atomic densities from distributed, dense, square, real-valued !! density matrix. !! diff --git a/src/dftbp/dftbplus/initprogram.F90 b/src/dftbp/dftbplus/initprogram.F90 index 15d280ccef..a18f55fe18 100644 --- a/src/dftbp/dftbplus/initprogram.F90 +++ b/src/dftbp/dftbplus/initprogram.F90 @@ -4074,12 +4074,20 @@ subroutine initProgramVariables(this, input, env) call error("Electron dynamics is not compatibile with this spinor Hamiltonian") end if - if (withMpi) then - call error("Electron dynamics does not work with MPI yet") + if (input%ctrl%elecDynInp%tIons .and. withMPI) then + call error("Ion dynamics time propagation does not work with MPI yet") + end if + + if (.not. this%tRealHS .and. withMpi) then + call error("Electron dynamics of periodic systems does not work with MPI yet") + end if + + if (.not. this%tRealHS .and. withMpi) then + call error("Electron dynamics of periodic systems does not work with MPI yet") end if - if (this%tFixEf) then - call error("Electron dynamics does not work with fixed Fermi levels yet") + if ((allocated(this%dftbU) .or. allocated(this%onSiteElements)) .and. withMpi) then + call error("Electron dynamics with DFTB+U or onsite corrections not implemented with MPI yet") end if if (this%tSpinSharedEf) then @@ -4118,7 +4126,7 @@ subroutine initProgramVariables(this, input, env) & this%mass, this%nAtom, this%atomEigVal, this%dispersion, this%nonSccDeriv,& & this%tPeriodic, this%parallelKS, this%tRealHS, this%kPoint, this%kWeight,& & this%isHybridXc, this%scc, this%tblite, this%eFieldScaling, this%hamiltonianType,& - & errStatus) + & this%denseDesc, errStatus) if (errStatus%hasError()) call error(errStatus%message) end if diff --git a/src/dftbp/dftbplus/mainapi.F90 b/src/dftbp/dftbplus/mainapi.F90 index b6cee83067..2006198858 100644 --- a/src/dftbp/dftbplus/mainapi.F90 +++ b/src/dftbp/dftbplus/mainapi.F90 @@ -753,13 +753,16 @@ end subroutine initializeTimeProp !> Finalizes the dynamics (releases memory, closes eventual open files) - subroutine finalizeTimeProp(main) + subroutine finalizeTimeProp(env, main) + + !> dftb+ environment + type(TEnvironment), intent(inout) :: env !> Instance type(TDftbPlusMain), intent(inout) :: main if (allocated(main%electronDynamics)) then - call finalizeDynamics(main%electronDynamics) + call finalizeDynamics(main%electronDynamics, env) end if end subroutine finalizeTimeProp diff --git a/src/dftbp/dftbplus/parser.F90 b/src/dftbp/dftbplus/parser.F90 index 29f3b9af3d..883465cf1c 100644 --- a/src/dftbp/dftbplus/parser.F90 +++ b/src/dftbp/dftbplus/parser.F90 @@ -5807,13 +5807,6 @@ subroutine readElecDynamics(node, input, geom, masses) real (dp) :: defPpRange(2) logical :: defaultWrite - #:if WITH_MPI - if (associated(node)) then - call detailedError(node, 'This DFTB+ binary has been compiled with MPI settings and & - & electron dynamics are not currently available for distributed parallel calculations.') - end if - #:endif - call getChildValue(node, "Steps", input%steps) call getChildValue(node, "TimeStep", input%dt, modifier=modifier, child=child) call convertUnitHsd(char(modifier), timeUnits, child, input%dt) diff --git a/src/dftbp/extlibs/scalapackfx.F90 b/src/dftbp/extlibs/scalapackfx.F90 index fa6906e0ea..d347c7ae6c 100644 --- a/src/dftbp/extlibs/scalapackfx.F90 +++ b/src/dftbp/extlibs/scalapackfx.F90 @@ -12,9 +12,9 @@ module dftbp_extlibs_scalapackfx #:if WITH_SCALAPACK use libscalapackfx_module, only : blacsfx_gemr2d, blacsfx_gsum, blacsgrid, blocklist, CSRC_,& & DLEN_, linecomm, M_, MB_, N_, NB_, pblasfx_pgemm, pblasfx_phemm, pblasfx_psymm,& - & pblasfx_psymv, pblasfx_ptran, pblasfx_ptranc, RSRC_, scalafx_addg2l, scalafx_addl2g,& + & pblasfx_psymv, pblasfx_ptran, pblasfx_ptranc, pblasfx_ptranu, RSRC_, scalafx_addg2l, scalafx_addl2g,& & scalafx_cpg2l, scalafx_cpl2g, scalafx_getdescriptor, scalafx_getlocalshape,& - & scalafx_indxl2g, scalafx_infog2l, scalafx_islocal, scalafx_pgetrf, scalafx_phegv,& + & scalafx_indxl2g, scalafx_infog2l, scalafx_islocal, scalafx_pgetri, scalafx_pgetrf, scalafx_phegv,& & scalafx_phegvd, scalafx_phegvr, scalafx_pposv, scalafx_ppotrf, scalafx_ppotri,& & scalafx_psygv, scalafx_psygvd, scalafx_psygvr, size #:endif diff --git a/src/dftbp/timedep/dynamicsrestart.F90 b/src/dftbp/timedep/dynamicsrestart.F90 index 67052bb051..e75f52f2fc 100644 --- a/src/dftbp/timedep/dynamicsrestart.F90 +++ b/src/dftbp/timedep/dynamicsrestart.F90 @@ -11,8 +11,14 @@ !> Routines for the restart of the time propagation of the density matrix/atoms module dftbp_timedep_dynamicsrestart use dftbp_common_accuracy, only : dp + use dftbp_common_environment, only : TEnvironment use dftbp_common_file, only : closeFile, openFile, TFileDescr, TOpenOptions use dftbp_common_status, only : TStatus +#:if WITH_SCALAPACK + use dftbp_extlibs_scalapackfx, only : linecomm, DLEN_, M_, N_, scalafx_getdescriptor + use dftbp_type_densedescr, only: TDenseDescr + use dftbp_type_commontypes, only : TParallelKS +#:endif implicit none !> Version number for restart format, please increment if you change the file format (and consider @@ -21,11 +27,14 @@ module dftbp_timedep_dynamicsrestart private public :: writeRestartFile, readRestartFile - +#:if WITH_SCALAPACK + public :: writeRestartFileBlacs, readRestartFileBlacs +#:endif + contains !> Write to a restart file. - subroutine writeRestartFile(rho, rhoOld, coord, veloc, time, dt, fileName, isAsciiFile, errStatus) + subroutine writeRestartFile(rho, rhoOld, coord, veloc, time, dt, fileNamePrefix, isAsciiFile, errStatus) !> Density matrix complex(dp), intent(in) :: rho(:,:,:) @@ -45,8 +54,8 @@ subroutine writeRestartFile(rho, rhoOld, coord, veloc, time, dt, fileName, isAsc !> Time step being used (in atomic units) real(dp), intent(in) :: dt - !> Name of the dump file - character(len=*), intent(in) :: fileName + !> Name prefix of the dump file (extension appended within the routine automatically) + character(len=*), intent(in) :: fileNamePrefix !> Should restart data be written as ascii (cross platform, but potentially lower !! reproducibility) or binary files @@ -60,19 +69,19 @@ subroutine writeRestartFile(rho, rhoOld, coord, veloc, time, dt, fileName, isAsc character(len=120) :: error_string if (isAsciiFile) then - call openFile(fd, trim(fileName) // '.dat', mode="w", iostat=iErr) + call openFile(fd, trim(fileNamePrefix) // '.dat', mode="w", iostat=iErr) else ! Set to stream explicitely, as it was written as stream from the beginning - call openFile(fd, trim(fileName) // '.bin',& + call openFile(fd, trim(fileNamePrefix) // '.bin',& & options=TOpenOptions(form='unformatted', access='stream', action='write'), iostat=iErr) end if if (iErr /= 0) then if (isAsciiFile) then - write(error_string, "(A,A,A)") "Failure to open external restart file ",trim(fileName),& + write(error_string, "(A,A,A)") "Failure to open external restart file ",trim(fileNamePrefix),& & ".dat for writing" else - write(error_string, "(A,A,A)") "Failure to open external restart file ",trim(fileName),& + write(error_string, "(A,A,A)") "Failure to open external restart file ",trim(fileNamePrefix),& & ".bin for writing" end if @:RAISE_ERROR(errStatus, -1, error_string) @@ -117,7 +126,7 @@ end subroutine writeRestartFile !> Read a restart file containing density matrix, overlap, coordinates and time step - subroutine readRestartFile(rho, rhoOld, coord, veloc, time, dt, fileName, isAsciiFile, errStatus) + subroutine readRestartFile(rho, rhoOld, coord, veloc, time, dt, fileNamePrefix, isAsciiFile, errStatus) !> Density Matrix complex(dp), intent(out) :: rho(:,:,:) @@ -134,8 +143,8 @@ subroutine readRestartFile(rho, rhoOld, coord, veloc, time, dt, fileName, isAsci !> Time step being currently used (in atomic units) for checking compatibility real(dp), intent(in) :: dt - !> Name of the file to open - character(*), intent(in) :: fileName + !> Name prefix of the dump file (extension appended within the routine automatically) + character(*), intent(in) :: fileNamePrefix !> Atomic velocities real(dp), intent(out) :: veloc(:,:) @@ -154,33 +163,35 @@ subroutine readRestartFile(rho, rhoOld, coord, veloc, time, dt, fileName, isAsci character(len=120) :: error_string if (isAsciiFile) then - inquire(file=trim(fileName)//'.dat', exist=isExisting) + inquire(file=trim(fileNamePrefix)//'.dat', exist=isExisting) if (.not. isExisting) then - error_string = "TD restart file " // trim(fileName)//'.dat' // " is missing" + error_string = "TD restart file " // trim(fileNamePrefix)//'.dat' // " is missing" @:RAISE_ERROR(errStatus, -1, error_string) end if else - inquire(file=trim(fileName)//'.bin', exist=isExisting) + inquire(file=trim(fileNamePrefix)//'.bin', exist=isExisting) if (.not. isExisting) then - error_string = "TD restart file " // trim(fileName)//'.bin' // " is missing" + error_string = "TD restart file " // trim(fileNamePrefix)//'.bin' // " is missing" @:RAISE_ERROR(errStatus, -1, error_string) end if end if if (isAsciiFile) then - call openFile(fd, trim(fileName)//'.dat', mode="r", iostat=iErr) + call openFile(fd, trim(fileNamePrefix)//'.dat', mode="r", iostat=iErr) else ! Set to stream explicitely, as it was written as stream from the beginning - call openFile(fd, file=trim(fileName)//'.bin',& + call openFile(fd, file=trim(fileNamePrefix)//'.bin',& & options=TOpenOptions(form='unformatted', access='stream', action='read',& & position="rewind"), iostat=iErr) end if if (iErr /= 0) then if (isAsciiFile) then - write(error_string, "(A,A,A)") "Failure to open external tddump file",trim(fileName), ".dat" + write(error_string, "(A,A,A)") "Failure to open external tddump file",& + & trim(fileNamePrefix), ".dat" else - write(error_string, "(A,A,A)") "Failure to open external tddump file",trim(fileName), ".bin" + write(error_string, "(A,A,A)") "Failure to open external tddump file",& + & trim(fileNamePrefix), ".bin" end if @:RAISE_ERROR(errStatus, -1, error_string) end if @@ -261,4 +272,180 @@ subroutine readRestartFile(rho, rhoOld, coord, veloc, time, dt, fileName, isAsci end subroutine readRestartFile +#:if WITH_SCALAPACK + !> Write to a restart file the DM in distributed format + subroutine writeRestartFileBlacs(rho, rhoOld, coord, veloc, time, dt, fileNamePrefix, env, & + & denseDesc, parallelKS, errStatus) + + !> Density matrix + complex(dp), intent(in) :: rho(:,:,:) + + !> Density matrix at previous time step + complex(dp), intent(in) :: rhoOld(:,:,:) + + !> Atomic coordinates + real(dp), intent(in) :: coord(:,:) + + !> Atomic velocities + real(dp), intent(in) :: veloc(:,:) + + !> Simulation time (in atomic units) + real(dp), intent(in) :: time + + !> Time step being used (in atomic units) + real(dp), intent(in) :: dt + + !> Name prefix of the dump file (extension appended within the routine automatically) + character(len=*), intent(in) :: fileNamePrefix + + !> Environment settings + type(TEnvironment), intent(inout) :: env + + !> Dense matrix descriptor + type(TDenseDescr), intent(in) :: denseDesc + + !> Container for parallel distribution pattern of k-points and spins + type(TParallelKS), allocatable :: parallelKS + + !> Operation status + type(TStatus), intent(out) :: errStatus + + type(TFileDescr) :: fd + integer :: iErr, nOrb, nSpin, icol, iKS, irow, icoll + type(linecomm) :: collector + complex(dp), allocatable :: localRhoCol(:) + + @:ASSERT(env%mpi%nGroup == 1) + nOrb = denseDesc%fullSize + nSpin = parallelKS%nLocalKS + if (env%mpi%tGlobalLead) then + call openFile(fd, trim(fileNamePrefix) // '.bin',& + & options=TOpenOptions(form='unformatted', access='stream', action='write')) + write(fd%unit) iDumpFormat + write(fd%unit) nOrb, nSpin, size(coord, dim=2), time, dt + write(fd%unit) coord, veloc + end if + + nOrb = denseDesc%fullSize + allocate(localRhoCol(nOrb)) + + call collector%init(env%blacs%orbitalGrid, denseDesc%blacsOrbSqr, "c") + do iKS = 1, parallelKS%nLocalKS + do icol = 1, nOrb + if (env%mpi%tGlobalLead) then + call collector%getline_lead(env%blacs%orbitalGrid, icol, rho(:,:,iKS), localRhoCol) + write(fd%unit) localRhoCol + else + call collector%getline_follow(env%blacs%orbitalGrid, icol, rho(:,:,iKS)) + end if + end do + end do + + do iKS = 1, parallelKS%nLocalKS + do icol = 1, nOrb + if (env%mpi%tGlobalLead) then + call collector%getline_lead(env%blacs%orbitalGrid, icol, rhoOld(:,:,iKS), localRhoCol) + write(fd%unit) localRhoCol + else + call collector%getline_follow(env%blacs%orbitalGrid, icol, rhoOld(:,:,iKS)) + end if + end do + end do + + if (env%mpi%tGlobalLead) then + call closeFile(fd) + end if + + end subroutine writeRestartFileBlacs + + + !> Read from a restart file the DM in Blacs format + subroutine readRestartFileBlacs(rho, rhoOld, coord, veloc, time, dt, fileNamePrefix, env, & + & denseDesc, parallelKS, errStatus) + + !> Density Matrix + complex(dp), intent(out) :: rho(:,:,:) + + !> Previous density Matrix + complex(dp), intent(out) :: rhoOld(:,:,:) + + !> Atomic coordinates + real(dp), intent(out) :: coord(:,:) + + !> Previous simulation elapsed time until restart file writing + real(dp), intent(out) :: time + + !> Time step being currently used (in atomic units) for checking compatibility + real(dp), intent(in) :: dt + + !> Name prefix of the dump file (extension appended within the routine automatically) + character(*), intent(in) :: fileNamePrefix + + !> Atomic velocities + real(dp), intent(out) :: veloc(:,:) + + !> Environment settings + type(TEnvironment), intent(inout) :: env + + !> Dense matrix descriptor + type(TDenseDescr), intent(in) :: denseDesc + + !> Container for parallel distribution pattern of k-points and spins + type(TParallelKS), allocatable :: parallelKS + + !> Operation status + type(TStatus), intent(out) :: errStatus + + type(TFileDescr) :: fd + integer :: iErr, version, nOrb, nSpin, nAtom, icol, iKS, irow, icoll + character(len=120) :: error_string + real(dp) :: deltaT + type(linecomm) :: distributor + complex(dp), allocatable :: localRhoCol(:) + + ! The following reading routine is very fragile and still missing checks + ! on nOrb, nSpin, nAtom and so on. This should be improved in the future. + if (env%mpi%tGlobalLead) then + call openFile(fd, file=trim(fileNamePrefix)//'.bin',& + & options=TOpenOptions(form='unformatted', access='stream', action='read',& + & position="rewind")) + + read(fd%unit) version + read(fd%unit) nOrb, nSpin, nAtom, time, deltaT + read(fd%unit) coord, veloc + end if + + nOrb = denseDesc%fullSize + allocate(localRhoCol(nOrb)) + + call distributor%init(env%blacs%orbitalGrid, denseDesc%blacsOrbSqr, "c") + do iKS = 1, parallelKS%nLocalKS + do icol = 1, nOrb + if (env%mpi%tGlobalLead) then + read(fd%unit) localRhoCol + call distributor%setline_lead(env%blacs%orbitalGrid, icol, localRhoCol(:), rho(:,:,iKS)) + else + call distributor%setline_follow(env%blacs%orbitalGrid, icol, rho(:,:,iKS)) + end if + end do + end do + + do iKS = 1, parallelKS%nLocalKS + do icol = 1, nOrb + if (env%mpi%tGlobalLead) then + read(fd%unit) localRhoCol + call distributor%setline_lead(env%blacs%orbitalGrid, icol, localRhoCol(:), rhoOld(:,:,iKS)) + else + call distributor%setline_follow(env%blacs%orbitalGrid, icol, rhoOld(:,:,iKS)) + end if + end do + end do + + if (env%mpi%tGlobalLead) then + call closeFile(fd) + end if + + end subroutine readRestartFileBlacs +#:endif + end module dftbp_timedep_dynamicsrestart diff --git a/src/dftbp/timedep/timeprop.F90 b/src/dftbp/timedep/timeprop.F90 index 073b0a3a2a..25711fb8ba 100644 --- a/src/dftbp/timedep/timeprop.F90 +++ b/src/dftbp/timedep/timeprop.F90 @@ -68,9 +68,22 @@ module dftbp_timedep_timeprop use dftbp_solvation_solvation, only : TSolvation use dftbp_timedep_dynamicsrestart, only : readRestartFile, writeRestartFile use dftbp_type_commontypes, only : TOrbitals, TParallelKS + use dftbp_type_densedescr, only: TDenseDescr use dftbp_type_eleccutoffs, only : TCutoffs use dftbp_type_integral, only : TIntegral use dftbp_type_multipole, only : TMultipole, TMultipole_init +#:if WITH_SCALAPACK + use dftbp_dftb_densitymatrix, only : makeDensityMtxRealBlacs + use dftbp_dftb_populations, only : mulliken, denseSubtractDensityOfAtomsCmplxNonperiodicBlacs + use dftbp_dftb_sparse2dense, only : unpackHSRealBlacs, packRhoRealBlacs + use dftbp_extlibs_mpifx, only : MPI_SUM, mpifx_allreduceip, mpifx_bcast + use dftbp_extlibs_scalapackfx, only : pblasfx_psymm, pblasfx_ptran, pblasfx_pgemm, & + & scalafx_pgetri, scalafx_pgetrf, M_, N_, pblasfx_ptranu, DLEN_,& + & scalafx_getdescriptor, scalafx_addl2g + use dftbp_math_matrixops, only : adjointLowerTriangle_BLACS + use dftbp_math_scalafxext, only : psymmatinv, phermatinv + use dftbp_timedep_dynamicsrestart, only : writeRestartFileBlacs, readRestartFileBlacs +#:endif #:if WITH_MBD use dftbp_dftb_dispmbd, only : TDispMbd #:endif @@ -620,6 +633,9 @@ module dftbp_timedep_timeprop !> Number of dynamics steps to perform integer, public :: nSteps + !> Dense matrix descriptor + type(TDenseDescr) :: denseDesc + end type TElecDynamics @@ -698,7 +714,7 @@ module dftbp_timedep_timeprop subroutine TElecDynamics_init(this, inp, species, speciesName, tWriteAutotest, autotestTag,& & randomThermostat, cutoff, mass, nAtom, atomEigVal, dispersion, nonSccDeriv, tPeriodic,& & parallelKS, tRealHS, kPoint, kWeight, isHybridXc, sccCalc, tblite, eFieldScaling,& - & hamiltonianType, errStatus) + & hamiltonianType, denseDesc, errStatus) !> ElecDynamics instance type(TElecDynamics), intent(out) :: this @@ -775,6 +791,9 @@ subroutine TElecDynamics_init(this, inp, species, speciesName, tWriteAutotest, a !> Type of Hamiltonian used integer, intent(in) :: hamiltonianType + !> Dense matrix descriptor + type(TDenseDescr), intent(in) :: denseDesc + !> Error status type(TStatus), intent(inout) :: errStatus @@ -815,6 +834,7 @@ subroutine TElecDynamics_init(this, inp, species, speciesName, tWriteAutotest, a if (allocated(tblite)) then this%tblite = tblite end if + this%denseDesc = denseDesc if (inp%envType /= envTypes%constant) then this%time0 = inp%time0 @@ -880,6 +900,16 @@ subroutine TElecDynamics_init(this, inp, species, speciesName, tWriteAutotest, a this%species0 = species this%tPeriodic = tPeriodic this%isHybridXc = isHybridXc + #:if WITH_MPI + if (isHybridXc) then + @:RAISE_ERROR(errStatus, -1, "MPI-parallel HybridXc not implement for& + & real-time TDDFTB.") + end if + if (allocated(tblite)) then + @:RAISE_ERROR(errStatus, -1, "MPI-parallel real-time TDDFTB not available for xTB& + & Hamiltonian yet.") + end if + #:endif this%tWriteAtomEnergies = inp%tWriteAtomEnergies if (this%tIons) then @@ -1121,6 +1151,13 @@ subroutine runDynamics(this, boundaryCond, eigvecs, H0, speciesAll, q0, referenc tWriteAutotest = this%tWriteAutotest this%iCall = 1 + + #:if WITH_SCALAPACK + if (env%mpi%nGroup /= 1) then + @:RAISE_ERROR(errStatus, -1, "Real-time dynamics parallelized only using 1 MPI group.") + end if + #:endif + if (allocated(this%polDirs)) then if (size(this%polDirs) > 1) then this%initCoord = coord @@ -1360,7 +1397,7 @@ subroutine doDynamics(this, boundaryCond, eigvecsReal, H0, q0, referenceN0, ints & this%occ, this%lastBondPopul, taggedWriter) end if - call finalizeDynamics(this) + call finalizeDynamics(this, env) end subroutine doDynamics @@ -1478,10 +1515,22 @@ subroutine updateH(this, H1, ints, H0, speciesAll, qq, q0, coord, orb, potential real(dp), allocatable :: T2(:,:) integer :: iAtom, iEatom, iSpin, iKS, iK logical :: tImHam + integer :: nLocalCols, nLocalRows + ! Multipole expansion type(TMdftb), allocatable :: mdftb - allocate(T2(this%nOrbs,this%nOrbs)) + #:if WITH_SCALAPACK + nLocalRows = size(H1, dim=1) + nLocalCols = size(H1, dim=2) + #:else + nLocalRows = this%nOrbs + nLocalCols = this%nOrbs + #:endif + + if (this%tRealHS) then + allocate(T2(nLocalRows, nLocalCols)) + end if ints%hamiltonian(:,:) = 0.0_dp @@ -1540,6 +1589,18 @@ subroutine updateH(this, H1, ints, H0, speciesAll, qq, q0, coord, orb, potential call qm2ud(qq) end if + #:if WITH_SCALAPACK + do iKS = 1, this%parallelKS%nLocalKS + iK = this%parallelKS%localKS(1, iKS) + iSpin = this%parallelKS%localKS(2, iKS) + if (this%tRealHS) then + call unpackHSRealBlacs(env%blacs, ints%hamiltonian(:,iSpin), neighbourList%iNeighbour,& + & nNeighbourSK, iSparseStart, img2CentCell, this%denseDesc, T2) + H1(:,:,iSpin) = cmplx(T2, kind=dp) + ! TODO: add here the unpacking of H1 for kpoints + end if + end do + #:else do iKS = 1, this%parallelKS%nLocalKS iK = this%parallelKS%localKS(1, iKS) iSpin = this%parallelKS%localKS(2, iKS) @@ -1547,7 +1608,7 @@ subroutine updateH(this, H1, ints, H0, speciesAll, qq, q0, coord, orb, potential call unpackHS(T2, ints%hamiltonian(:,iSpin), neighbourList%iNeighbour, nNeighbourSK,& & iSquare, iSparseStart, img2CentCell) call adjointLowerTriangle(T2) - H1(:,:,iSpin) = cmplx(T2, 0.0_dp, dp) + H1(:,:,iSpin) = cmplx(T2, kind=dp) else call unpackHS(H1(:,:,iKS), ints%hamiltonian(:,iSpin), this%kPoint(:,iK),& & neighbourList%iNeighbour, nNeighbourSK, this%iCellVec, this%cellVec, iSquare,& @@ -1555,13 +1616,26 @@ subroutine updateH(this, H1, ints, H0, speciesAll, qq, q0, coord, orb, potential call adjointLowerTriangle(H1(:,:,iKS)) end if end do + #:endif - ! add hybrid xc-functional contribution + ! add hybrid xc-functional contribution + ! TODO: this is currently disabled because tests C4H6_rs and C4H6_rs_Singlet were failing. if (this%isHybridXc) then - #:if WITH_MPI - @:RAISE_ERROR(errStatus, -1, "Timeprop Module: MPI-parallelization not implemented for hybrid& - & xc-functionals.") - #:else + #:if WITH_MPI + deltaRho = rho + if (this%nSpin > 2) then + @:RAISE_ERROR(errStatus, -1, "HybridXc: Not implemented for non-colinear spin.") + end if + call denseSubtractDensityOfAtomsCmplxNonperiodicBlacs(env, this%parallelKS, q0,& + & this%denseDesc, deltaRho) + + do iSpin = 1, this%nSpin + HSqrCplxCam(:,:) = (0.0_dp, 0.0_dp) + call hybridXc%addCamHamiltonianMatrix_cmplx_blacs(env, this%denseDesc, sSqr(:,:,iSpin),& + & deltaRho(:,:,iSpin), HSqrCplxCam) + H1(:,:,iSpin) = H1(:,:,iSpin) + HSqrCplxCam + end do + #:else deltaRho = rho if (this%nSpin > 2) then @:RAISE_ERROR(errStatus, -1, "HybridXc: Not implemented for non-colinear spin.") @@ -1574,18 +1648,21 @@ subroutine updateH(this, H1, ints, H0, speciesAll, qq, q0, coord, orb, potential & deltaRho(:,:, iSpin), HSqrCplxCam) H1(:,:,iSpin) = H1(:,:,iSpin) + HSqrCplxCam end do - #:endif + #:endif end if end subroutine updateH !> Kick the density matrix for spectrum calculations - subroutine kickDM(this, rho, Ssqr, Sinv, iSquare, coord) + subroutine kickDM(this, env, rho, Ssqr, Sinv, iSquare, coord, orb) !> ElecDynamics instance type(TElecDynamics), intent(in) :: this + !> Environment settings + type(TEnvironment), intent(in) :: env + !> Square overlap complex(dp), intent(in) :: Ssqr(:,:,:) @@ -1601,21 +1678,31 @@ subroutine kickDM(this, rho, Ssqr, Sinv, iSquare, coord) !> Index array for start of atomic block in dense matrices integer, intent(in) :: iSquare(:) - complex(dp), allocatable :: T1(:, :, :), T2(:, :), T3(:, :, :), T4(:, :) - integer :: iAt, iStart, iEnd, iKS, iSpin, iOrb - real(dp) :: pkick(this%nSpin) + !> Atomic orbital information + type(TOrbitals), intent(in) :: orb + complex(dp), allocatable :: T1(:, :, :), T2(:, :), T3(:, :, :), T4(:, :), tmp1(:,:), tmp2(:,:) + integer :: iAt, iStart, iEnd, iKS, iSpin, iOrb, iOrbStart, nOrb + real(dp) :: pkick(this%nSpin) + integer :: nLocalCols, nLocalRows character(1), parameter :: localDir(3) = ['x', 'y', 'z'] - allocate(T1(this%nOrbs, this%nOrbs, this%parallelKS%nLocalKS)) - allocate(T2(this%nOrbs, this%nOrbs)) - allocate(T3(this%nOrbs, this%nOrbs, this%parallelKS%nLocalKS)) - allocate(T4(this%nOrbs, this%nOrbs)) - - T1(:,:,:) = cmplx(0,0,dp) - T2(:,:) = cmplx(0,0,dp) - T3(:,:,:) = cmplx(0,0,dp) - T4(:,:) = cmplx(0,0,dp) + #:if WITH_SCALAPACK + nLocalRows = size(rho, dim=1) + nLocalCols = size(rho, dim=2) + #:else + nLocalRows = this%nOrbs + nLocalCols = this%nOrbs + #:endif + allocate(T1(nLocalRows, nLocalCols, this%parallelKS%nLocalKS)) + allocate(T2(nLocalRows, nLocalCols)) + allocate(T3(nLocalRows, nLocalCols, this%parallelKS%nLocalKS)) + allocate(T4(nLocalRows, nLocalCols)) + + T1(:,:,:) = cmplx(0, 0, kind=dp) + T2(:,:) = cmplx(0, 0, kind=dp) + T3(:,:,:) = cmplx(0, 0, kind=dp) + T4(:,:) = cmplx(0, 0, kind=dp) pkick(1) = this%field @@ -1628,6 +1715,44 @@ subroutine kickDM(this, rho, Ssqr, Sinv, iSquare, coord) end select end if + #:if WITH_SCALAPACK + allocate(tmp1(orb%mOrb, orb%mOrb)) + allocate(tmp2(orb%mOrb, orb%mOrb)) + + do iKS = 1, this%parallelKS%nLocalKS + iSpin = this%parallelKS%localKS(2, iKS) + do iAt = 1, this%nAtom + iOrbStart = this%denseDesc%iAtomStart(iAt) + nOrb = this%denseDesc%iAtomStart(iAt + 1) - iOrbStart + tmp1(:,:) = 0.0_dp + tmp2(:,:) = 0.0_dp + do iOrb = 1, nOrb + tmp1(iOrb, iOrb) = exp(cmplx(0, -pkick(iSpin) * coord(this%currPolDir, iAt), dp)) + tmp2(iOrb, iOrb) = exp(cmplx(0, pkick(iSpin) * coord(this%currPolDir, iAt), dp)) + end do + call scalafx_addl2g(env%blacs%orbitalGrid, tmp1(1:nOrb, 1:nOrb), this%denseDesc%blacsOrbSqr,& + & iOrbStart, iOrbStart, T1(:,:,iKS)) + call scalafx_addl2g(env%blacs%orbitalGrid, tmp2(1:nOrb, 1:nOrb), this%denseDesc%blacsOrbSqr,& + & iOrbStart, iOrbStart, T3(:,:,iKS)) + end do + end do + + deallocate(tmp1, tmp2) + + do iKS = 1, this%parallelKS%nLocalKS + call pblasfx_pgemm(T1(:,:,iKS), this%denseDesc%blacsOrbSqr, rho(:,:,iKS), this%denseDesc%blacsOrbSqr,& + & T2, this%denseDesc%blacsOrbSqr) + call pblasfx_pgemm(T2, this%denseDesc%blacsOrbSqr, Ssqr(:,:,iKS), this%denseDesc%blacsOrbSqr,& + & T4, this%denseDesc%blacsOrbSqr, cmplx(1, 0, dp)) + call pblasfx_pgemm(T4, this%denseDesc%blacsOrbSqr, T3(:,:,iKS), this%denseDesc%blacsOrbSqr,& + & T2, this%denseDesc%blacsOrbSqr) + call pblasfx_pgemm(T2, this%denseDesc%blacsOrbSqr, Sinv(:,:,iKS), this%denseDesc%blacsOrbSqr,& + & rho(:,:,iKS), this%denseDesc%blacsOrbSqr, cmplx(0.5, 0, dp)) + call pblasfx_pgemm(Sinv(:,:,iKS), this%denseDesc%blacsOrbSqr, T2, this%denseDesc%blacsOrbSqr,& + & rho(:,:,iKS), this%denseDesc%blacsOrbSqr, cmplx(0.5, 0, dp), cmplx(1, 0, dp), 'N', 'C') + end do + + #:else do iKS = 1, this%parallelKS%nLocalKS iSpin = this%parallelKS%localKS(2, iKS) do iAt = 1, this%nAtom @@ -1648,17 +1773,21 @@ subroutine kickDM(this, rho, Ssqr, Sinv, iSquare, coord) call gemm(rho(:,:,iKS), Sinv(:,:,iKS), T2, cmplx(0.5, 0, dp), cmplx(1, 0, dp), 'N', 'C') end do + #:endif write(stdout,"(A)")'Density kicked along ' // localDir(this%currPolDir) //'!' end subroutine kickDM !> Creates array for an external TD field - subroutine getTDFunction(this, startTime) + subroutine getTDFunction(this, env, startTime) !> ElecDynamics instance type(TElecDynamics), intent(inout) :: this + !> Environment settings + type(TEnvironment), intent(in) :: env + !> Starting time of the simulation, if relevant real(dp), intent(in) :: startTime @@ -1666,7 +1795,13 @@ subroutine getTDFunction(this, startTime) real(dp) :: tdfun(3) integer :: iStep type(TFileDescr) :: laserDat + logical :: isLead + #:if WITH_MPI + isLead = env%mpi%tGlobalLead + #:else + isLead = .true. + #:endif allocate(this%tdFunction(3, 0:this%nSteps)) this%tdFunction(:,:) = 0.0_dp @@ -1681,54 +1816,53 @@ subroutine getTDFunction(this, startTime) E0 = 0.0_dp !this is to make sure we never sum the current field with that read from file end if - if (this%tEnvFromFile) then - call openFile(laserDat, "laser.dat", mode="r") - else - if (this%tVerboseDyn) call openOutputFile(this, laserDat, 'laser.dat') - end if - - if (.not. this%tEnvFromFile .and. this%tVerboseDyn) then - write(laserDat%unit, "(A)") "# time (fs) | E_x (eV/ang) | E_y (eV/ang) | E_z (eV/ang)" - end if - - do iStep = 0,this%nSteps - time = iStep * this%dt + startTime - - if (this%envType == envTypes%constant) then - envelope = 1.0_dp - else if (this%envType == envTypes%gaussian) then - envelope = exp(-4.0_dp*pi*(time-midPulse)**2 / deltaT**2) - else if (this%envType == envTypes%sin2 .and. time >= this%time0 .and. time <= this%time1) then - envelope = sin(pi*(time-this%time0)/deltaT)**2 - else - envelope = 0.0_dp - end if - + if (isLead) then if (this%tEnvFromFile) then - read(laserDat%unit, *)time, tdfun(1), tdfun(2), tdfun(3) - this%tdFunction(:, iStep) = tdfun * (Bohr__AA / Hartree__eV) + call openFile(laserDat, "laser.dat", mode="r") + do iStep = 0,this%nSteps + read(laserDat%unit, *)time, tdfun(1), tdfun(2), tdfun(3) + this%tdFunction(:, iStep) = tdfun * (Bohr__AA / Hartree__eV) + end do else - this%tdFunction(:, iStep) = E0 * envelope * aimag(exp(imag*(time*angFreq + this%phase))& - & * this%fieldDir) - if (this%tVerboseDyn) then - write(laserDat%unit, "(5F15.8)") time * au__fs,& - & this%tdFunction(:, iStep) * (Hartree__eV / Bohr__AA) + if (this%tVerboseDyn) then + call openOutputFile(this, env, laserDat, 'laser.dat') + write(laserDat%unit, "(A)") "# time (fs) | E_x (eV/ang) | E_y (eV/ang) | E_z (eV/ang)" end if end if - end do + do iStep = 0,this%nSteps + time = iStep * this%dt + startTime - call closeFile(laserDat) + if (this%envType == envTypes%constant) then + envelope = 1.0_dp + else if (this%envType == envTypes%gaussian) then + envelope = exp(-4.0_dp*pi*(time-midPulse)**2 / deltaT**2) + else if (this%envType == envTypes%sin2 .and. time >= this%time0 .and. time <= this%time1) then + envelope = sin(pi*(time-this%time0)/deltaT)**2 + else + envelope = 0.0_dp + end if + + this%tdFunction(:, iStep) = E0 * envelope * aimag(exp(imag*(time*angFreq + this%phase))& + & * this%fieldDir) + write(laserDat%unit, "(5F15.8)") time * au__fs,& + & this%tdFunction(:, iStep) * (Hartree__eV / Bohr__AA) + end do + end if + #:if WITH_MPI + call mpifx_bcast(env%mpi%globalComm, this%tdFunction) + #:endif end subroutine getTDFunction !> Calculate charges, dipole moments subroutine getChargeDipole(this, deltaQ, qq, multipole, dipole, q0, rho, Ssqr, Dsqr, Qsqr,& - & coord, iSquare, eFieldScaling, qBlock, qNetAtom, errStatus) + & coord, iSquare, eFieldScaling, qBlock, qNetAtom, errStatus, & + & iNeighbour, nNeighbourSK, orb, iSparseStart, img2CentCell, env, ints) !> ElecDynamics instance - type(TElecDynamics), intent(in) :: this + type(TElecDynamics), intent(inout) :: this !> Negative gross charge real(dp), intent(out) :: deltaQ(:,:) @@ -1775,11 +1909,50 @@ subroutine getChargeDipole(this, deltaQ, qq, multipole, dipole, q0, rho, Ssqr, D !> Error status type(TStatus), intent(inout) :: errStatus + !> Atomic neighbour data + integer, intent(in) :: iNeighbour(0:,:) + + !> Number of neighbours for each of the atoms + integer, intent(in) :: nNeighbourSK(:) + + !> Atomic orbital information + type(TOrbitals), intent(in) :: orb + + !> index array for location of atomic blocks in large sparse arrays + integer, intent(in) :: iSparseStart(0:,:) + + !> image atoms to their equivalent in the central cell + integer, intent(in) :: img2CentCell(:) + + !> Environment settings + type(TEnvironment), intent(inout) :: env + + !> Integral container + type(TIntegral), intent(inout) :: ints + integer :: iAt, iSpin, iOrb1, iOrb2, nOrb, iKS, iK, ii + real(dp), allocatable :: tmp(:,:) qq(:,:,:) = 0.0_dp + if (this%tRealHS) then + #:if WITH_SCALAPACK + this%rhoPrim(:,:) = 0.0_dp + allocate(tmp (size(rho,dim=1),size(rho,dim=2))) + do iSpin = 1, this%nSpin + tmp = real(rho(:,:,iSpin), dp) + call packRhoRealBlacs(env%blacs, this%denseDesc, tmp, iNeighbour, nNeighbourSK,& + & orb%mOrb, iSparseStart, img2CentCell, this%rhoPrim(:,iSpin)) + end do + deallocate(tmp) + call mpifx_allreduceip(env%mpi%globalComm, this%rhoPrim, MPI_SUM) + + do iSpin = 1, this%nSpin + call mulliken(env, qq(:,:,iSpin), ints%overlap, this%rhoPrim(:,iSpin), orb, iNeighbour,& + & nNeighbourSK, img2CentCell, iSparseStart) + end do + #:else do iSpin = 1, this%nSpin do iAt = 1, this%nAtom iOrb1 = iSquare(iAt) @@ -1789,6 +1962,7 @@ subroutine getChargeDipole(this, deltaQ, qq, multipole, dipole, q0, rho, Ssqr, D & rho(:,iOrb1:iOrb2,iSpin)*Ssqr(:,iOrb1:iOrb2,iSpin), dim=1), dp) end do end do + #:endif else @@ -1921,7 +2095,7 @@ end subroutine getChargeDipole !> Calculate energy - modify to include new way to calculate energy - !> Repulsive energy and dispersion energies must be calculated before calling this subroutine + !! Repulsive energy and dispersion energies must be calculated before calling this subroutine subroutine getTDEnergy(this, env, energy, rhoPrim, rho, neighbourList, nNeighbourSK, orb,& & iSquare, iSparseStart, img2CentCell, ham0, qq, q0, potential, chargePerShell, energyKin,& & tDualSpinOrbit, thirdOrd, solvation, hybridXc, qDepExtPot, qBlock, dftbU, xi,& @@ -2018,7 +2192,7 @@ subroutine getTDEnergy(this, env, energy, rhoPrim, rho, neighbourList, nNeighbou !> Error status type(TStatus), intent(inout) :: errStatus - real(dp), allocatable :: qiBlock(:,:,:,:) ! never allocated + real(dp), allocatable :: qiBlock(:,:,:,:), tmp(:,:) integer :: iKS, iK, iSpin real(dp) :: TS(this%nSpin) type(TReksCalc), allocatable :: reks ! never allocated @@ -2030,6 +2204,21 @@ subroutine getTDEnergy(this, env, energy, rhoPrim, rho, neighbourList, nNeighbou ! check allways that calcEnergy is called AFTER getForces if (.not. this%tForces) then rhoPrim(:,:) = 0.0_dp + + #:if WITH_SCALAPACK + do iKS = 1, this%parallelKS%nLocalKS + iSpin = this%parallelKS%localKS(2, iKS) + if (this%tRealHS) then + allocate(tmp (size(rho,dim=1),size(rho,dim=2))) + tmp = real(rho(:,:,iSpin), dp) + call packRhoRealBlacs(env%blacs, this%denseDesc, tmp, neighbourlist%iNeighbour,& + & nNeighbourSK, orb%mOrb, iSparseStart, img2CentCell, rhoPrim(:,iSpin)) + deallocate(tmp) + ! TODO: add here the case for complex Hamiltonian + end if + end do + call mpifx_allreduceip(env%mpi%globalComm, rhoPrim, MPI_SUM) + #:else do iKS = 1, this%parallelKS%nLocalKS iSpin = this%parallelKS%localKS(2, iKS) if (this%tRealHS) then @@ -2042,6 +2231,8 @@ subroutine getTDEnergy(this, env, energy, rhoPrim, rho, neighbourList, nNeighbou & iSquare, iSparseStart, img2CentCell) end if end do + #:endif + end if call ud2qm(rhoPrim) @@ -2067,15 +2258,18 @@ end subroutine getTDEnergy !> Create all necessary matrices and instances for dynamics - subroutine initializeTDVariables(this, densityMatrix, rho, H1, Ssqr, Sinv, H0, ham0, Dsqr, Qsqr,& - & ints, eigvecsReal, filling, orb, rhoPrim, potential, iNeighbour, nNeighbourSK, iSquare,& - & iSparseStart, img2CentCell, Eiginv, EiginvAdj, energy, ErhoPrim, qBlock, qNetAtom, isDftbU,& - & onSiteElements, eigvecsCplx, HSqrCplxCam, bondWork, fdBondEnergy, fdBondPopul,& + subroutine initializeTDVariables(this, env, densityMatrix, rho, H1, Ssqr, Sinv, H0, ham0, Dsqr,& + & Qsqr, ints, eigvecsReal, filling, orb, rhoPrim, potential, iNeighbour, nNeighbourSK,& + & iSquare, iSparseStart, img2CentCell, Eiginv, EiginvAdj, energy, ErhoPrim, qBlock, qNetAtom,& + & isDftbU, onSiteElements, eigvecsCplx, HSqrCplxCam, bondWork, fdBondEnergy, fdBondPopul,& & lastBondPopul, time, errStatus) !> ElecDynamics instance type(TElecDynamics), intent(inout) :: this + !> Environment settings + type(TEnvironment), intent(in) :: env + !> Holds density matrix generation settings and real-space delta density matrix type(TDensityMatrix), intent(in) :: densityMatrix @@ -2186,8 +2380,11 @@ subroutine initializeTDVariables(this, densityMatrix, rho, H1, Ssqr, Sinv, H0, h real(dp), allocatable :: T2(:,:), T3(:,:) complex(dp), allocatable :: T4(:,:) - integer :: iSpin, iOrb, iOrb2, iKS, iK + integer :: iSpin, iOrb, iOrb2, iKS, iK, nLocalRows, nLocalCols type(TFileDescr) :: fillingsIn + #:if WITH_SCALAPACK + integer :: desc(DLEN_), nn + #:endif allocate(rhoPrim(size(ints%hamiltonian, dim=1), this%nSpin)) allocate(ErhoPrim(size(ints%hamiltonian, dim=1))) @@ -2195,16 +2392,46 @@ subroutine initializeTDVariables(this, densityMatrix, rho, H1, Ssqr, Sinv, H0, h allocate(ham0(size(H0))) ham0(:) = H0 + #:if WITH_SCALAPACK + nLocalRows = size(eigvecsReal, dim=1) + nLocalCols = size(eigvecsReal, dim=2) + #:else + nLocalRows = this%denseDesc%fullSize + nLocalCols = this%denseDesc%fullSize + #:endif + if (this%tRealHS) then - allocate(T2(this%nOrbs,this%nOrbs)) - allocate(T3(this%nOrbs, this%nOrbs)) + allocate(T2(nLocalRows, nLocalCols)) + allocate(T3(nLocalRows, nLocalCols)) else - allocate(T4(this%nOrbs,this%nOrbs)) + allocate(T4(nLocalRows, nLocalCols)) end if if (.not. this%tReadRestart) then Ssqr(:,:,:) = 0.0_dp Sinv(:,:,:) = 0.0_dp + + #:if WITH_SCALAPACK + do iKS = 1, this%parallelKS%nLocalKS + if (this%tRealHS) then + call unpackHSRealBlacs(env%blacs, ints%overlap, iNeighbour, nNeighbourSK, iSparseStart,& + & img2CentCell, this%denseDesc, T2) + Ssqr(:,:,iKS) = cmplx(T2, 0, dp) + call psymmatinv(this%denseDesc%blacsOrbSqr, T2, errStatus) + Sinv(:,:,iKS) = cmplx(T2, 0, dp) + + ! symmetrization needed for calculation of populations + nn = this%denseDesc%fullSize + call scalafx_getdescriptor(env%blacs%orbitalGrid, nn, nn, env%blacs%rowBlockSize,& + & env%blacs%columnBlockSize, desc) + call adjointLowerTriangle_BLACS(desc, env%blacs%orbitalGrid%myCol,& + & env%blacs%orbitalGrid%myRow, env%blacs%orbitalGrid%nCol,& + & env%blacs%orbitalGrid%nRow, Sinv(:,:,iKS)) + + ! TODO: add here the complex case + end if + end do + #:else do iKS = 1, this%parallelKS%nLocalKS if (this%tRealHS) then call unpackHS(T2, ints%overlap, iNeighbour, nNeighbourSK, iSquare, iSparseStart,& @@ -2220,20 +2447,35 @@ subroutine initializeTDVariables(this, densityMatrix, rho, H1, Ssqr, Sinv, H0, h else iK = this%parallelKS%localKS(1, iKS) iSpin = this%parallelKS%localKS(2, iKS) - T4(:,:) = cmplx(0,0,dp) + T4(:,:) = cmplx(0, 0, kind=dp) call unpackHS(T4, ints%overlap, this%kPoint(:,iK), iNeighbour, nNeighbourSK,& & this%iCellVec, this%cellVec, iSquare, iSparseStart, img2CentCell) call adjointLowerTriangle(T4) Ssqr(:,:,iKS) = T4 - Sinv(:,:,iKS) = cmplx(0,0,dp) + Sinv(:,:,iKS) = cmplx(0, 0, dp) do iOrb = 1, this%nOrbs Sinv(iOrb, iOrb, iKS) = 1.0_dp end do call gesv(T4, Sinv(:,:,iKS)) end if end do + #:endif + write(stdOut,"(A)")'S inverted' + + #:if WITH_SCALAPACK + do iKS = 1, this%parallelKS%nLocalKS + iK = this%parallelKS%localKS(1, iKS) + iSpin = this%parallelKS%localKS(2, iKS) + if (this%tRealHS) then + call unpackHSRealBlacs(env%blacs, ints%hamiltonian(:,iSpin), iNeighbour, nNeighbourSK,& + & iSparseStart, img2CentCell, this%denseDesc, T3) + H1(:,:,iKS) = cmplx(T3, 0, dp) + ! TODO: add here the complex case + end if + end do + #:else do iKS = 1, this%parallelKS%nLocalKS iK = this%parallelKS%localKS(1, iKS) iSpin = this%parallelKS%localKS(2, iKS) @@ -2248,21 +2490,21 @@ subroutine initializeTDVariables(this, densityMatrix, rho, H1, Ssqr, Sinv, H0, h call adjointLowerTriangle(H1(:,:,iKS)) end if end do + #:endif - call updateDQ(this, ints, iNeighbour, nNeighbourSK, img2CentCell, iSquare,& - & iSparseStart, Dsqr, Qsqr) + call updateDQ(this, ints, iNeighbour, nNeighbourSK, img2CentCell, iSquare, iSparseStart,& + & Dsqr, Qsqr) end if if (this%tPopulations) then - allocate(Eiginv(this%nOrbs, this%nOrbs, this%parallelKS%nLocalKS)) - allocate(EiginvAdj(this%nOrbs, this%nOrbs, this%parallelKS%nLocalKS)) + allocate(Eiginv(nLocalRows, nLocalCols, this%parallelKS%nLocalKS)) + allocate(EiginvAdj(nLocalRows, nLocalCols, this%parallelKS%nLocalKS)) do iKS = 1, this%parallelKS%nLocalKS if (this%tRealHS) then call tdPopulInit(this, Eiginv(:,:,iKS), EiginvAdj(:,:,iKS), eigvecsReal(:,:,iKS)) else - call tdPopulInit(this, Eiginv(:,:,iKS), EiginvAdj(:,:,iKS), & - & eigvecsCplx=eigvecsCplx(:,:,iKS)) + call tdPopulInit(this, Eiginv(:,:,iKS), EiginvAdj(:,:,iKS), eigvecsCplx=eigvecsCplx(:,:,iKS)) end if end do end if @@ -2280,6 +2522,18 @@ subroutine initializeTDVariables(this, densityMatrix, rho, H1, Ssqr, Sinv, H0, h if (.not.this%tReadRestart) then rho(:,:,:) = 0.0_dp + #:if WITH_SCALAPACK + do iKS = 1, this%parallelKS%nLocalKS + iK = this%parallelKS%localKS(1, iKS) + iSpin = this%parallelKS%localKS(2, iKS) + if (this%tRealHS) then + call makeDensityMtxRealBlacs(env%blacs%orbitalGrid, this%denseDesc%blacsOrbSqr,& + & filling(:,1,iSpin), eigvecsReal(:,:,iKS), T2) + rho(:,:,iKS) = cmplx(T2, 0, kind=dp) + ! TODO: add here the complex case + end if + end do + #:else do iKS = 1, this%parallelKS%nLocalKS iK = this%parallelKS%localKS(1, iKS) iSpin = this%parallelKS%localKS(2, iKS) @@ -2300,10 +2554,10 @@ subroutine initializeTDVariables(this, densityMatrix, rho, H1, Ssqr, Sinv, H0, h end do end do end do + #:endif end if - call TPotentials_init(potential, orb, this%nAtom, this%nSpin, & - & this%nDipole, this%nQuadrupole) + call TPotentials_init(potential, orb, this%nAtom, this%nSpin, this%nDipole, this%nQuadrupole) call TEnergies_init(energy, this%nAtom, this%nSpin) if (isDftbU .or. allocated(onSiteElements)) then @@ -2315,26 +2569,26 @@ subroutine initializeTDVariables(this, densityMatrix, rho, H1, Ssqr, Sinv, H0, h end if if (this%isHybridXc) then - allocate(HSqrCplxCam(this%nOrbs, this%nOrbs)) + allocate(HSqrCplxCam(nLocalRows, nLocalCols)) end if if (this%tBondE .or. this%tBondP) then allocate(bondWork(this%nAtom, this%nAtom)) end if if (this%tBondE) then - call openOutputFile(this, fdBondEnergy, 'bondenergy.bin', isBinary = .true.) + call openOutputFile(this, env, fdBondEnergy, 'bondenergy.bin', isBinary=.true.) end if if (this%tBondP) then - call openOutputFile(this, fdBondPopul, 'bondpop.bin', isBinary = .true.) + call openOutputFile(this, env, fdBondPopul, 'bondpop.bin', isBinary=.true.) end if call getBondPopulAndEnergy(this, bondWork, lastBondPopul, rhoPrim, ham0, ints, iNeighbour,& - & nNeighbourSK, iSparseStart, img2CentCell, iSquare, fdBondEnergy, fdBondPopul, time) + & nNeighbourSK, iSparseStart, img2CentCell, iSquare, fdBondEnergy, fdBondPopul, time, env) end subroutine initializeTDVariables !> Performs a step backwards to boot the dynamics using the Euler algorithm. - !> Output is rho(deltaT) called rhoNew, input is rho(t=0) (ground state) called rho + !! Output is rho(deltaT) called rhoNew, input is rho(t=0) (ground state) called rho subroutine initializePropagator(this, env, step, rho, rhoNew, H1, Sinv, coordAll, skOverCont,& & orb, neighbourList, nNeighbourSK, img2CentCell, iSquare) @@ -2400,12 +2654,21 @@ subroutine initializePropagator(this, env, step, rho, rhoNew, H1, Sinv, coordAll do iKS = 1, this%parallelKS%nLocalKS if (this%tIons .or. (.not. this%tRealHS)) then H1(:,:,iKS) = RdotSprime + imag * H1(:,:,iKS) + #:if WITH_SCALAPACK + call propagateRhoBlacs(this, rhoNew(:,:,iKS), rho(:,:,iKS), H1(:,:,iKS), Sinv(:,:,iKS),& + & step) + #:else call propagateRho(this, rhoNew(:,:,iKS), rho(:,:,iKS), H1(:,:,iKS), Sinv(:,:,iKS), step) + #:endif else - ! The following line is commented to make the fast propagate work since it needs a real H - !H1(:,:,iKS) = imag * H1(:,:,iKS) + #:if WITH_SCALAPACK + H1(:,:,iKS) = imag * H1(:,:,iKS) + call propagateRhoBlacs(this, rhoNew(:,:,iKS), rho(:,:,iKS), H1(:,:,iKS),& + & Sinv(:,:,iKS), step) + #:else call propagateRhoRealH(this, rhoNew(:,:,iKS), rho(:,:,iKS), H1(:,:,iKS), Sinv(:,:,iKS),& & step) + #:endif end if end do @@ -2434,6 +2697,7 @@ subroutine propagateRho(this, rhoOld, rho, H1, Sinv, step) real(dp), intent(in) :: step complex(dp), allocatable :: T1(:,:) + allocate(T1(this%nOrbs,this%nOrbs)) T1(:,:) = 0.0_dp @@ -2443,6 +2707,48 @@ subroutine propagateRho(this, rhoOld, rho, H1, Sinv, step) end subroutine propagateRho +#:if WITH_SCALAPACK + !> Propagate rho, notice that H = iH (coefficients are real) + subroutine propagateRhoBlacs(this, rhoOld, rho, H1, Sinv, step) + + !> ElecDynamics instance + type(TElecDynamics), intent(inout) :: this + + !> Density matrix at previous step + complex(dp), intent(inout) :: rhoOld(:,:) + + !> Density matrix + complex(dp), intent(in) :: rho(:,:) + + !> Square imaginary hamiltonian plus non-adiabatic contribution + complex(dp), intent(in) :: H1(:,:) + + !> Square overlap inverse + complex(dp), intent(in) :: Sinv(:,:) + + !> Time step in atomic units + real(dp), intent(in) :: step + + complex(dp), allocatable :: T1(:,:) + + integer :: nLocalCols, nLocalRows + + nLocalRows = size(rho, dim=1) + nLocalCols = size(rho, dim=2) + allocate(T1(nLocalRows, nLocalCols)) + + call pblasfx_pgemm(Sinv, this%denseDesc%blacsOrbSqr, H1, this%denseDesc%blacsOrbSqr,& + & T1, this%denseDesc%blacsOrbSqr) + + call pblasfx_pgemm(T1, this%denseDesc%blacsOrbSqr, rho, this%denseDesc%blacsOrbSqr,& + & rhoOld, this%denseDesc%blacsOrbSqr, alpha=cmplx(-step, 0, dp), beta=cmplx(1, 0, dp)) + + call pblasfx_pgemm(rho, this%denseDesc%blacsOrbSqr, T1, this%denseDesc%blacsOrbSqr,& + & rhoOld, this%denseDesc%blacsOrbSqr, alpha=cmplx(-step, 0, dp), beta=cmplx(1, 0, dp),& + & transa='N', transb='C') + + end subroutine propagateRhoBlacs +#:endif !> Propagate rho for real Hamiltonian (used for frozen nuclei dynamics and gamma point periodic) subroutine propagateRhoRealH(this, rhoOld, rho, H1, Sinv, step) @@ -2465,7 +2771,7 @@ subroutine propagateRhoRealH(this, rhoOld, rho, H1, Sinv, step) !> Time step in atomic units real(dp), intent(in) :: step - real(dp), allocatable :: T1R(:,:), T2R(:,:), T3R(:,:),T4R(:,:) + real(dp), allocatable :: T1R(:,:), T2R(:,:), T3R(:,:), T4R(:,:) allocate(T1R(this%nOrbs,this%nOrbs)) allocate(T2R(this%nOrbs,this%nOrbs)) @@ -2473,7 +2779,7 @@ subroutine propagateRhoRealH(this, rhoOld, rho, H1, Sinv, step) allocate(T4R(this%nOrbs,this%nOrbs)) ! The code below takes into account that Sinv and H1 are real, this is twice as fast as the - ! original above (propageteRho) + ! original above (propagateRho) ! get the real part of Sinv and H1 T1R(:,:) = real(H1, dp) @@ -2501,12 +2807,15 @@ end subroutine propagateRhoRealH !> Initialize output files - subroutine initTDOutput(this, dipoleDat, qDat, energyDat, populDat, forceDat, coorDat,& + subroutine initTDOutput(this, env, dipoleDat, qDat, energyDat, populDat, forceDat, coorDat,& & atomEnergyDat) !> ElecDynamics instance type(TElecDynamics), intent(in) :: this + !> Environment + type(TEnvironment), intent(in) :: env + !> Dipole output file ID type(TFileDescr), intent(out) :: dipoleDat @@ -2534,6 +2843,9 @@ subroutine initTDOutput(this, dipoleDat, qDat, energyDat, populDat, forceDat, co integer :: iSpin, iKS, iK, iErr if (.not. this%tVerboseDyn) return + #:if WITH_MPI + if (.not. env%mpi%tGlobalLead) return + #:endif if (this%tKick) then if (this%currPolDir == 1) then @@ -2546,7 +2858,7 @@ subroutine initTDOutput(this, dipoleDat, qDat, energyDat, populDat, forceDat, co else dipoleFileName = 'mu.dat' end if - call openOutputFile(this, dipoleDat, dipoleFileName) + call openOutputFile(this, env, dipoleDat, dipoleFileName) write(dipoleDat%unit, "(A)", advance = "NO")"# time (fs) |" select case(this%nSpin) @@ -2565,7 +2877,7 @@ subroutine initTDOutput(this, dipoleDat, qDat, energyDat, populDat, forceDat, co write(dipoleDat%unit, "(A)") if (this%tdWriteExtras) then - call openOutputFile(this, qDat, 'qsvst.dat') + call openOutputFile(this, env, qDat, 'qsvst.dat') write(qDat%unit, "(A)", advance = "NO")"# time (fs) |" write(qDat%unit, "(A)", advance = "NO")" total net charge (e) |" write(qDat%unit, "(A)", advance = "NO")" charge (atom_1) (e) |" @@ -2574,7 +2886,7 @@ subroutine initTDOutput(this, dipoleDat, qDat, energyDat, populDat, forceDat, co write(qDat%unit, "(A)", advance = "NO")" charge (atom_N) (e) |" write(qDat%unit, "(A)") - call openOutputFile(this, energyDat, 'energyvst.dat') + call openOutputFile(this, env, energyDat, 'energyvst.dat') write(energyDat%unit, "(A)", advance = "NO")"# time (fs) |" write(energyDat%unit, "(A)", advance = "NO")" E total (H) |" write(energyDat%unit, "(A)", advance = "NO")" E non-SCC (H) |" @@ -2587,7 +2899,7 @@ subroutine initTDOutput(this, dipoleDat, qDat, energyDat, populDat, forceDat, co write(energyDat%unit, "(A)") if (this%tForces) then - call openOutputFile(this, forceDat, 'forcesvst.dat') + call openOutputFile(this, env, forceDat, 'forcesvst.dat') write(forceDat%unit, "(A)", advance = "NO")"# time (fs) |" write(forceDat%unit, "(A)", advance = "NO")& & " force (atom_1) (H/b) | force (atom_2) (H/b) |" @@ -2597,7 +2909,7 @@ subroutine initTDOutput(this, dipoleDat, qDat, energyDat, populDat, forceDat, co end if if (this%tIons) then - call openOutputFile(this, coorDat, 'tdcoords.xyz') + call openOutputFile(this, env, coorDat, 'tdcoords.xyz') end if end if @@ -2606,13 +2918,13 @@ subroutine initTDOutput(this, dipoleDat, qDat, energyDat, populDat, forceDat, co iSpin = this%parallelKS%localKS(2, iKS) write(strSpin,'(i1)')iSpin if (this%tRealHS) then - call openOutputFile(this, populDat(iKS), 'molpopul' // trim(strSpin) // '.dat') + call openOutputFile(this, env, populDat(iKS), 'molpopul' // trim(strSpin) // '.dat') write(populDat(iKS)%unit, "(A,A)")& & "# GS molecular orbital populations, spin channel : ", trim(strSpin) else iK = this%parallelKS%localKS(1, iKS) write(strK,'(i0.3)')iK - call openOutputFile(this, populDat(iKS),& + call openOutputFile(this, env, populDat(iKS),& & 'molpopul' // trim(strSpin) // '-' // trim(strK) // '.dat') write(populDat(iKS)%unit, "(A,A,A,A)")& & "# GS molecular orbital populations, spin channel : ", trim(strSpin), ",& @@ -2636,7 +2948,7 @@ subroutine initTDOutput(this, dipoleDat, qDat, energyDat, populDat, forceDat, co end if if (this%tWriteAtomEnergies) then - call openOutputFile(this, atomEnergyDat, 'atomenergies.dat') + call openOutputFile(this, env, atomEnergyDat, 'atomenergies.dat') write(atomEnergyDat%unit, "(A)", advance = "NO")"# time (fs) |" write(atomEnergyDat%unit, "(A)", advance = "NO")" E total (H) |" write(atomEnergyDat%unit, "(A)", advance = "NO")" E (atom_1) (H) |" @@ -2650,13 +2962,20 @@ end subroutine initTDOutput !> Close output files - subroutine closeTDOutputs(this) + subroutine closeTDOutputs(this, env) !> ElecDynamics instance type(TElecDynamics), intent(inout) :: this + !> Environment + type(TEnvironment), intent(in) :: env + if (.not. this%tVerboseDyn) return +#:if WITH_MPI + if (.not. env%mpi%tGlobalLead) return +#:endif + call closeFile(this%dipoleDat) call closeFile(this%qDat) call closeFile(this%energyDat) @@ -2671,11 +2990,14 @@ end subroutine closeTDOutputs !> Open files in different ways depending on their previous existence - subroutine openOutputFile(this, fileDescr, fileName, isBinary) + subroutine openOutputFile(this, env, fileDescr, fileName, isBinary) !> ElecDynamics instance type(TElecDynamics), intent(in) :: this + !> Environment + type(TEnvironment), intent(in) :: env + !> File descriptor type(TFileDescr), intent(out) :: fileDescr @@ -2696,6 +3018,9 @@ subroutine openOutputFile(this, fileDescr, fileName, isBinary) logical :: isBinary_ if (.not. this%tVerboseDyn) return +#:if WITH_MPI + if (.not. env%mpi%tGlobalLead) return +#:endif if (present(isBinary)) then isBinary_ = isBinary @@ -2727,13 +3052,16 @@ end subroutine openOutputFile !> Write results to file - subroutine writeTDOutputs(this, dipoleDat, qDat, energyDat, forceDat, coorDat, fdBondPopul,& + subroutine writeTDOutputs(this, env, dipoleDat, qDat, energyDat, forceDat, coorDat, fdBondPopul,& & fdBondEnergy, atomEnergyDat, time, energy, energyKin, dipole, deltaQ, coord, totalForce,& & iStep) !> ElecDynamics instance type(TElecDynamics), intent(in) :: this + !> Environment + type(TEnvironment), intent(in) :: env + !> Data type for energy components and total type(TEnergies), intent(in) :: energy @@ -2786,6 +3114,9 @@ subroutine writeTDOutputs(this, dipoleDat, qDat, energyDat, forceDat, coorDat, f integer :: iAtom, iSpin, iDir if (.not. this%tVerboseDyn) return +#:if WITH_MPI + if (.not. env%mpi%tGlobalLead) return +#:endif write(dipoleDat%unit, '(7F25.15)') time * au__fs, ((dipole(iDir, iSpin) * Bohr__AA, iDir=1, 3),& & iSpin=1, this%nSpin) @@ -2881,13 +3212,51 @@ subroutine tdPopulInit(this, Eiginv, EiginvAdj, eigvecsReal, eigvecsCplx) !> Complex Eigevenctors complex(dp), intent(in), optional :: eigvecsCplx(:,:) - complex(dp), allocatable :: T2(:,:), T3(:,:) + complex(dp), allocatable :: T1(:,:), T2(:,:), T3(:,:) + integer, allocatable :: ipiv(:) + integer :: mm, nn integer :: iOrb + integer :: nLocalCols, nLocalRows, i, j, unit_num + + #:if WITH_SCALAPACK + nLocalRows = size(eigvecsReal, dim=1) + nLocalCols = size(eigvecsReal, dim=2) + #:else + nLocalRows = this%denseDesc%fullSize + nLocalCols = this%denseDesc%fullSize + #:endif + + allocate(T1(nLocalRows, nLocalCols)) + allocate(T2(nLocalRows, nLocalCols)) + allocate(T3(nLocalRows, nLocalCols)) - allocate(T2(this%nOrbs, this%nOrbs)) - allocate(T3(this%nOrbs, this%nOrbs)) + #:if WITH_SCALAPACK if (this%tRealHS) then - T2(:,:) = cmplx(eigvecsReal, kind=dp) + T2(:,:) = cmplx(eigvecsReal, 0, dp) + end if + + ! invert eigvecsReal with pgetrf and pgetri + mm = this%denseDesc%blacsOrbSqr(M_) + nn = this%denseDesc%blacsOrbSqr(N_) + allocate(ipiv(min(mm,nn))) + ipiv = 0 + call scalafx_pgetrf(T2, this%denseDesc%blacsOrbSqr, ipiv) + call scalafx_pgetri(T2, this%denseDesc%blacsOrbSqr, ipiv) + Eiginv(:,:) = T2 + + if (this%tRealHS) then + T1 = cmplx(eigvecsReal, kind=dp) + call pblasfx_ptranu(T1, this%denseDesc%blacsOrbSqr, T2, this%denseDesc%blacsOrbSqr) + end if + + ! invert adjoint(eigvecsReal) with pgetrf and pgetri + ipiv = 0 + call scalafx_pgetrf(T2, this%denseDesc%blacsOrbSqr, ipiv) + call scalafx_pgetri(T2, this%denseDesc%blacsOrbSqr, ipiv) + EiginvAdj(:,:) = T2 + #:else + if (this%tRealHS) then + T2 = cmplx(eigvecsReal, kind=dp) else T2(:,:) = eigvecsCplx end if @@ -2909,6 +3278,7 @@ subroutine tdPopulInit(this, Eiginv, EiginvAdj, eigvecsReal, eigvecsCplx) end do call gesv(T2, T3) EiginvAdj(:,:) = T3 + #:endif end subroutine tdPopulInit @@ -2962,22 +3332,25 @@ subroutine updateBasisMatrices(this, env, electronicSolver, Eiginv, EiginvAdj, H @:PROPAGATE_ERROR(errStatus) if (this%tRealHS) then T2(:,:) = real(T1, dp) - call tdPopulInit(this, Eiginv(:,:,iKS), EiginvAdj(:,:,iKS), T2) + call tdPopulInit(this, Eiginv(:,:,iKS), EiginvAdj(:,:,iKS), eigVecsReal=T2) else call tdPopulInit(this, Eiginv(:,:,iKS), EiginvAdj(:,:,iKS), eigvecsCplx=T1) end if end do - deallocate(T1, T2) end subroutine updateBasisMatrices !> Calculate populations at each time step - subroutine getTDPopulations(this, occ, rho, Eiginv, EiginvAdj, populDat, time, iKS) + subroutine getTDPopulations(this, env, occ, rho, Eiginv, EiginvAdj, populDat, time, iKS,& + & iNeighbour, nNeighbourSK, mOrb, iSparseStart, img2CentCell, rhoPrim) !> ElecDynamics instance type(TElecDynamics), intent(in) :: this + !> Environment settings + type(TEnvironment), intent(in) :: env + !> Density Matrix complex(dp), intent(in) :: rho(:,:,:) @@ -3000,12 +3373,62 @@ subroutine getTDPopulations(this, occ, rho, Eiginv, EiginvAdj, populDat, time, i real(dp), intent(inout) :: occ(:) !> Auxiliary matrix - complex(dp) :: T1(this%nOrbs,this%nOrbs) + complex(dp), allocatable :: T1(:,:), T11(:,:), T2(:,:), T3(:,:) - integer :: ii + !> Neighbour list for the atoms (First index from 0!) + integer, intent(in) :: iNeighbour(0:, :) - if (.not. this%tVerboseDyn) return + !> Nr. of neighbours for the atoms. + integer, intent(in) :: nNeighbourSK(:) + + !> Maximal number of orbitals on an atom. + integer, intent(in) :: mOrb + + !> indexing array for the sparse Hamiltonian + integer, intent(in) :: iSparseStart(0:, :) + + !> Mapping between image atoms and corresponding atom in the central cell. + integer, intent(in) :: img2CentCell(:) + !> Sparse density matrix + real(dp), allocatable, intent(inout) :: rhoPrim(:,:) + + integer :: nLocalCols, nLocalRows, ii, i + + if (.not. this%tVerboseDyn) return + + #:if WITH_SCALAPACK + nLocalRows = size(Eiginv, dim=1) + nLocalCols = size(Eiginv, dim=2) + #:else + nLocalRows = this%denseDesc%fullSize + nLocalCols = this%denseDesc%fullSize + #:endif + allocate(T1(nLocalRows, nLocalCols)) + allocate(T3(nLocalRows, nLocalCols)) + + #:if WITH_SCALAPACK + if (this%tRealHS) then + ! T3 = rho*EiginvAdj + call pblasfx_pgemm(rho(:,:,iKS), this%denseDesc%blacsOrbSqr, EiginvAdj(:,:,iKS),& + & this%denseDesc%blacsOrbSqr, T3, this%denseDesc%blacsOrbSqr, transa="N", transb="N") + ! T1 = Trans(Eiginv)*rho*EiginvAdj + call pblasfx_pgemm(Eiginv(:,:,iKS), this%denseDesc%blacsOrbSqr, T3,& + & this%denseDesc%blacsOrbSqr, T1, this%denseDesc%blacsOrbSqr, transa="N", transb="N") + end if + + ! get occupations from distributed matrix + call unpackTDpopulBlacs(iNeighbour, nNeighbourSK, mOrb, iSparseStart, img2CentCell,& + & real(T1, dp), rhoPrim, env, occ, this, iKS, this%denseDesc) + + if (env%mpi%tGlobalLead) then + write(populDat(iKS)%unit,'(*(2x,F25.15))', advance='no') time * au__fs + do ii = 1, size(occ) + write(populDat(iKS)%unit,'(*(2x,F25.15))', advance='no')occ(ii) + end do + write(populDat(iKS)%unit,*) + end if + #:else call gemm(T1, rho(:,:,iKS), EiginvAdj(:,:,iKS)) T1 = transpose(Eiginv(:,:,iKS)) * T1 @@ -3014,7 +3437,8 @@ subroutine getTDPopulations(this, occ, rho, Eiginv, EiginvAdj, populDat, time, i do ii = 1, size(occ) write(populDat(iKS)%unit,'(*(2x,F25.15))', advance='no')occ(ii) end do - write(populDat(iKS)%unit,*) + write(populDat(iKS)%unit, *) + #:endif end subroutine getTDPopulations @@ -3123,8 +3547,7 @@ subroutine initIonDynamics(this, coordNew, coord) ! Velocities should actually be v(t+0.5*dt), not v(t), ! like this: this%movedVelo(:,:) = this%movedVelo + 0.5_dp * movedAccel * this%dt coordNew(:,:) = coord - coordNew(:,this%indMovedAtom) = coordNew(:,this%indMovedAtom) & - & + this%movedVelo(:,:) * this%dt + coordNew(:,this%indMovedAtom) = coordNew(:,this%indMovedAtom) + this%movedVelo * this%dt ! This re-initializes the velocity Verlet propagator with coordNew if (this%nDynamicsInit == 0) then @@ -3144,7 +3567,7 @@ end subroutine initIonDynamics !> Calculates non-SCC hamiltonian and overlap for new geometry and reallocates sparse arrays subroutine updateH0S(this, env, ints, orb, skHamCont, skOverCont, neighbourList, nNeighbourSK,& - & iSparseStart, img2CentCell, iSquare, coordAll, Sinv, Ssqr, ham0, Dsqr, Qsqr) + & iSparseStart, img2CentCell, iSquare, coordAll, Sinv, Ssqr, ham0, errStatus, Dsqr, Qsqr) !> ElecDynamics instance type(TElecDynamics), intent(inout), target :: this @@ -3191,6 +3614,9 @@ subroutine updateH0S(this, env, ints, orb, skHamCont, skOverCont, neighbourList, !> Local sparse storage for non-SCC hamiltonian real(dp), intent(inout), allocatable :: ham0(:) + !> Error status + type(TStatus), intent(inout) :: errStatus + !> Square dipole matrix complex(dp), intent(inout), optional :: Dsqr(:,:,:,:) @@ -3198,8 +3624,14 @@ subroutine updateH0S(this, env, ints, orb, skHamCont, skOverCont, neighbourList, complex(dp), intent(inout), optional :: Qsqr(:,:,:,:) real(dp), allocatable :: SSqrReal(:,:), SinvReal(:,:) + complex(dp), allocatable :: T4(:,:) - integer :: iSpin, iOrb, iKS, iK + + integer :: iSpin, iOrb, iKS, iK, nLocalRows, nLocalCols + + #:if WITH_SCALAPACK + integer :: desc(DLEN_), nn + #:endif select case(this%hamiltonianType) case default @@ -3216,8 +3648,44 @@ subroutine updateH0S(this, env, ints, orb, skHamCont, skOverCont, neighbourList, & ints%dipoleBra, ints%dipoleKet, ints%quadrupoleBra, ints%quadrupoleKet) end select + ! TODO: modify this routine to enable restart and ion dynamics with MPI + + #:if WITH_SCALAPACK + nLocalRows = size(Sinv, dim=1) + nLocalCols = size(Sinv, dim=2) + #:else + nLocalRows = this%denseDesc%fullSize + nLocalCols = this%denseDesc%fullSize + #:endif + + if (this%tRealHS) then + allocate(SSqrReal(nLocalRows, nLocalCols)) + allocate(SinvReal(nLocalRows, nLocalCols)) + end if + + #:if WITH_SCALAPACK if (this%tRealHS) then - allocate(SSqrReal(this%nOrbs,this%nOrbs), source=0.0_dp) + do iKS = 1, this%parallelKS%nLocalKS + SSqrReal(:,:) = 0.0_dp + call unpackHSRealBlacs(env%blacs, ints%overlap, neighbourList%iNeighbour, nNeighbourSK,& + & iSparseStart, img2CentCell, this%denseDesc, SSqrReal) + Ssqr(:,:,iKS) = cmplx(SSqrReal, 0, dp) + + call psymmatinv(this%denseDesc%blacsOrbSqr, SSqrReal, errStatus) + Sinv(:,:,iKS) = cmplx(SSqrReal, 0, dp) + + nn = this%denseDesc%fullSize + call scalafx_getdescriptor(env%blacs%orbitalGrid, nn, nn, env%blacs%rowBlockSize,& + & env%blacs%columnBlockSize, desc) + call adjointLowerTriangle_BLACS(desc, env%blacs%orbitalGrid%myCol,& + & env%blacs%orbitalGrid%myRow, env%blacs%orbitalGrid%nCol, env%blacs%orbitalGrid%nRow,& + & Sinv(:,:,iKS)) + end do + ! TODO: add here complex overlap matrix with blacs + end if + #:else + if (this%tRealHS) then + SSqrReal(:,:) = 0.0_dp call unpackHS(SSqrReal, ints%overlap, neighbourList%iNeighbour, nNeighbourSK, iSquare,& & iSparseStart, img2CentCell) call adjointLowerTriangle(SSqrReal) @@ -3225,7 +3693,7 @@ subroutine updateH0S(this, env, ints, orb, skHamCont, skOverCont, neighbourList, Ssqr(:,:,iKS) = cmplx(SSqrReal, 0, dp) end do - allocate(SinvReal(this%nOrbs,this%nOrbs), source=0.0_dp) + SinvReal(:,:) = 0.0_dp do iOrb = 1, this%nOrbs SinvReal(iOrb, iOrb) = 1.0_dp end do @@ -3234,11 +3702,8 @@ subroutine updateH0S(this, env, ints, orb, skHamCont, skOverCont, neighbourList, do iKS = 1, this%parallelKS%nLocalKS Sinv(:,:,iKS) = cmplx(SinvReal, 0, dp) end do - else - allocate(T4(this%nOrbs,this%nOrbs)) - Ssqr(:,:,:) = cmplx(0, 0, dp) do iKS = 1, this%parallelKS%nLocalKS iK = this%parallelKS%localKS(1, iKS) iSpin = this%parallelKS%localKS(2, iKS) @@ -3253,9 +3718,8 @@ subroutine updateH0S(this, env, ints, orb, skHamCont, skOverCont, neighbourList, end do call gesv(T4, Sinv(:,:,iKS)) end do - deallocate(T4) - end if + #:endif call updateDQ(this, ints, neighbourList%iNeighbour, nNeighbourSK, img2CentCell, iSquare,& & iSparseStart, Dsqr, Qsqr) @@ -3304,17 +3768,17 @@ subroutine updateDQ(this, ints, iNeighbour, nNeighbourSK, img2CentCell, iSquare, do iKS = 1, this%parallelKS%nLocalKS iK = this%parallelKS%localKS(1, iKS) iSpin = this%parallelKS%localKS(2, iKS) - call unpackDQ(M3, ints%dipoleBra, ints%dipoleKet, iNeighbour,& - & nNeighbourSK, iSquare, iSparseStart, img2CentCell) + call unpackDQ(M3, ints%dipoleBra, ints%dipoleKet, iNeighbour, nNeighbourSK, iSquare,& + & iSparseStart, img2CentCell) Dsqr(:,:,:,iKS) = cmplx(M3, 0, dp) end do else do iKS = 1, this%parallelKS%nLocalKS iK = this%parallelKS%localKS(1, iKS) iSpin = this%parallelKS%localKS(2, iKS) - call unpackDQ(Dsqr(:,:,:,iKS), ints%dipoleBra, ints%dipoleKet,& - & this%kPoint(:,iK), iNeighbour, nNeighbourSK, this%iCellVec, this%cellVec,& - & iSquare, iSparseStart, img2CentCell) + call unpackDQ(Dsqr(:,:,:,iKS), ints%dipoleBra, ints%dipoleKet, this%kPoint(:,iK),& + & iNeighbour, nNeighbourSK, this%iCellVec, this%cellVec, iSquare, iSparseStart,& + & img2CentCell) end do end if end if @@ -3327,17 +3791,17 @@ subroutine updateDQ(this, ints, iNeighbour, nNeighbourSK, img2CentCell, iSquare, do iKS = 1, this%parallelKS%nLocalKS iK = this%parallelKS%localKS(1, iKS) iSpin = this%parallelKS%localKS(2, iKS) - call unpackDQ(M3, ints%quadrupoleBra, ints%quadrupoleKet, iNeighbour,& - & nNeighbourSK, iSquare, iSparseStart, img2CentCell) + call unpackDQ(M3, ints%quadrupoleBra, ints%quadrupoleKet, iNeighbour, nNeighbourSK,& + & iSquare, iSparseStart, img2CentCell) Qsqr(:,:,:,iKS) = cmplx(M3, 0, dp) end do else do iKS = 1, this%parallelKS%nLocalKS iK = this%parallelKS%localKS(1, iKS) iSpin = this%parallelKS%localKS(2, iKS) - call unpackDQ(Qsqr(:,:,:,iKS), ints%quadrupoleBra, ints%quadrupoleKet,& - & this%kPoint(:,iK), iNeighbour, nNeighbourSK, this%iCellVec, this%cellVec,& - & iSquare, iSparseStart, img2CentCell) + call unpackDQ(Qsqr(:,:,:,iKS), ints%quadrupoleBra, ints%quadrupoleKet, this%kPoint(:,iK),& + & iNeighbour, nNeighbourSK, this%iCellVec, this%cellVec, iSquare, iSparseStart,& + & img2CentCell) end do end if end if @@ -3506,7 +3970,7 @@ subroutine getForces(this, movedAccel, totalForce, rho, H1, Sinv, neighbourList, end if if (allocated(this%sccCalc)) then call this%sccCalc%updateCharges(env, qq, orb, this%speciesAll, q0) - call this%sccCalc%addForceDc(env, derivs, this%speciesAll, neighbourList%iNeighbour, & + call this%sccCalc%addForceDc(env, derivs, this%speciesAll, neighbourList%iNeighbour,& & img2CentCell) end if if (allocated(repulsive)) then @@ -3540,7 +4004,7 @@ subroutine getForces(this, movedAccel, totalForce, rho, H1, Sinv, neighbourList, totalDeriv(:,:) = repulsiveDerivs + derivs if (allocated(this%dispersion)) then - call this%dispersion%addGradients(env, neighbourList, this%speciesAll, coordAll, & + call this%dispersion%addGradients(env, neighbourList, this%speciesAll, coordAll,& & img2CentCell, totalDeriv) end if @@ -3555,8 +4019,8 @@ end subroutine getForces !> Calculates nonadiabatic matrix: overlap gradient (Sprime) times velocities (Rdot) - subroutine getRdotSprime(this, RdotSprime, coordAll, skOverCont, orb, img2CentCell, & - &neighbourList, nNeighbourSK, iSquare) + subroutine getRdotSprime(this, RdotSprime, coordAll, skOverCont, orb, img2CentCell,& + & neighbourList, nNeighbourSK, iSquare) !> ElecDynamics instance type(TElecDynamics), intent(in), target :: this @@ -3595,8 +4059,8 @@ subroutine getRdotSprime(this, RdotSprime, coordAll, skOverCont, orb, img2CentCe sPrimeTmp(:,:,:) = 0.0_dp RdotSprime(:,:) = 0.0_dp - !$OMP PARALLEL DO PRIVATE(iAtom1,iStart1,iEnd1,iSp1,nOrb1,sPrimeTmp2,iNeigh,iAtom2, & - !$OMP& iAtom2f,iStart2,iEnd2,iSp2,nOrb2,sPrimeTmp,iDir) DEFAULT(SHARED) & + !$OMP PARALLEL DO PRIVATE(iAtom1,iStart1,iEnd1,iSp1,nOrb1,sPrimeTmp2,iNeigh,iAtom2,& + !$OMP& iAtom2f,iStart2,iEnd2,iSp2,nOrb2,sPrimeTmp,iDir) DEFAULT(SHARED)& !$OMP& SCHEDULE(RUNTIME) do iAtom1 = 1, this%nAtom iStart1 = iSquare(iAtom1) @@ -3751,7 +4215,7 @@ end subroutine getPositionDependentEnergy !> Calculates bond populations and bond energies if requested subroutine getBondPopulAndEnergy(this, bondWork, lastBondPopul, rhoPrim, ham0, ints, iNeighbour,& - & nNeighbourSK, iSparseStart, img2CentCell, iSquare, fdBondEnergy, fdBondPopul, time) + & nNeighbourSK, iSparseStart, img2CentCell, iSquare, fdBondEnergy, fdBondPopul, time, env) !> ElecDynamics instance type(TElecDynamics), intent(inout) :: this @@ -3795,6 +4259,9 @@ subroutine getBondPopulAndEnergy(this, bondWork, lastBondPopul, rhoPrim, ham0, i !> Elapsed simulation time real(dp), intent(in) :: time + !> Environment settings + type(TEnvironment), intent(in) :: env + integer :: iSpin if (this%tBondE) then @@ -3811,7 +4278,13 @@ subroutine getBondPopulAndEnergy(this, bondWork, lastBondPopul, rhoPrim, ham0, i call addPairWiseBondInfo(bondWork, rhoPrim(:,1), ints%overlap, iSquare,& & iNeighbour, nNeighbourSK, img2CentCell, iSparseStart) end do + #:if WITH_SCALAPACK + if (env%mpi%tGlobalLead) then + write(fdBondPopul%unit) time * au__fs, sum(bondWork), bondWork + end if + #:else write(fdBondPopul%unit) time * au__fs, sum(bondWork), bondWork + #:endif if (this%tWriteAutotest) then lastBondPopul = sum(bondWork) end if @@ -3989,6 +4462,7 @@ subroutine initializeDynamics(this, boundaryCond, coord, orb, neighbourList, nNe !> Error status type(TStatus), intent(inout) :: errStatus + integer :: nLocalCols, nLocalRows real(dp), allocatable :: velInternal(:,:) this%startTime = 0.0_dp @@ -4017,32 +4491,46 @@ subroutine initializeDynamics(this, boundaryCond, coord, orb, neighbourList, nNe end if call TMultipole_init(this%multipole, this%nAtom, this%nDipole, this%nQuadrupole, this%nSpin) - allocate(this%trho(this%nOrbs,this%nOrbs,this%parallelKS%nLocalKS)) - allocate(this%trhoOld(this%nOrbs,this%nOrbs,this%parallelKS%nLocalKS)) - allocate(this%Ssqr(this%nOrbs,this%nOrbs,this%parallelKS%nLocalKS)) - allocate(this%Sinv(this%nOrbs,this%nOrbs,this%parallelKS%nLocalKS)) + #:if WITH_SCALAPACK + nLocalRows = size(eigvecsReal, dim=1) + nLocalCols = size(eigvecsReal, dim=2) + #:else + nLocalRows = this%denseDesc%fullSize + nLocalCols = this%denseDesc%fullSize + #:endif + + allocate(this%trho(nLocalRows, nLocalCols,this%parallelKS%nLocalKS)) + allocate(this%trhoOld(nLocalRows, nLocalCols,this%parallelKS%nLocalKS)) + allocate(this%Ssqr(nLocalRows, nLocalCols,this%parallelKS%nLocalKS)) + allocate(this%Sinv(nLocalRows, nLocalCols,this%parallelKS%nLocalKS)) + allocate(this%H1(nLocalRows, nLocalCols,this%parallelKS%nLocalKS)) + allocate(this%RdotSprime(nLocalRows, nLocalCols)) + if (this%nDipole > 0) then allocate(this%Dsqr(this%nDipole,this%nOrbs,this%nOrbs,this%parallelKS%nLocalKS)) end if if (this%nQuadrupole > 0) then allocate(this%Qsqr(this%nQuadrupole,this%nOrbs,this%nOrbs,this%parallelKS%nLocalKS)) end if - allocate(this%H1(this%nOrbs,this%nOrbs,this%parallelKS%nLocalKS)) allocate(this%qq(orb%mOrb, this%nAtom, this%nSpin)) allocate(this%deltaQ(this%nAtom,this%nSpin)) allocate(this%dipole(3,this%nSpin)) allocate(this%chargePerShell(orb%mShell,this%nAtom,this%nSpin)) allocate(this%occ(this%nOrbs)) - allocate(this%RdotSprime(this%nOrbs,this%nOrbs)) allocate(this%totalForce(3, this%nAtom)) this%RdotSprime(:,:) = 0.0_dp this%totalForce(:,:) = 0.0_dp this%occ(:) = 0.0_dp if (this%tReadRestart) then + #:if WITH_SCALAPACK + call readRestartFileBlacs(this%trho, this%trhoOld, coord, this%movedVelo, this%time, this%dt,& + & restartFileName, env, this%denseDesc, this%parallelKS, errStatus) + #:else call readRestartFile(this%trho, this%trhoOld, coord, this%movedVelo, this%startTime, this%dt,& - & restartFileName, this%tRestartAscii, errStatus) + & restartFileName, this%tRestartAscii, errStatus) + #:endif @:PROPAGATE_ERROR(errStatus) call handleCoordinateChange(this, env, boundaryCond, hybridXc, ints, orb, neighbourList,& & nNeighbourSK, symNeighbourList, nNeighbourCamSym, coord, coordAll, this%ham0,& @@ -4050,7 +4538,7 @@ subroutine initializeDynamics(this, boundaryCond, coord, orb, neighbourList, nNe @:PROPAGATE_ERROR(errStatus) call updateH0S(this, env, ints, orb, skHamCont, skOverCont, neighbourList, nNeighbourSK,& & iSparseStart, img2CentCell, iSquare, coordAll, this%Sinv, this%Ssqr, this%ham0,& - & Dsqr=this%Dsqr, Qsqr=this%Qsqr) + & errStatus, Dsqr=this%Dsqr, Qsqr=this%Qsqr) @:PROPAGATE_ERROR(errStatus) if (this%tIons) then this%initialVelocities(:,:) = this%movedVelo @@ -4064,17 +4552,17 @@ subroutine initializeDynamics(this, boundaryCond, coord, orb, neighbourList, nNe @:PROPAGATE_ERROR(errStatus) call updateH0S(this, env, ints, orb, skHamCont, skOverCont, neighbourList, nNeighbourSK,& & iSparseStart, img2CentCell, iSquare, coordAll, this%Sinv, this%Ssqr, this%ham0,& - & Dsqr=this%Dsqr, Qsqr=this%Qsqr) + & errStatus, Dsqr=this%Dsqr, Qsqr=this%Qsqr) @:PROPAGATE_ERROR(errStatus) this%initialVelocities(:,:) = this%movedVelo this%ReadMDVelocities = .true. end if if (this%tLaser .and. .not. this%tdFieldThroughAPI .and. this%iCall == 1) then - call getTDFunction(this, this%startTime) + call getTDFunction(this, env, this%startTime) end if - call initializeTDVariables(this, densityMatrix, this%trho, this%H1, this%Ssqr, this%Sinv, H0,& - & this%ham0, this%Dsqr, this%Qsqr, ints, eigvecsReal, filling, orb, this%rhoPrim,& + call initializeTDVariables(this, env, densityMatrix, this%trho, this%H1, this%Ssqr, this%Sinv,& + & H0, this%ham0, this%Dsqr, this%Qsqr, ints, eigvecsReal, filling, orb, this%rhoPrim,& & this%potential, neighbourList%iNeighbour, nNeighbourSK, iSquare, iSparseStart,& & img2CentCell, this%Eiginv, this%EiginvAdj, this%energy, this%ErhoPrim, this%qBlock,& & this%qNetAtom, allocated(dftbU), onSiteElements, eigvecsCplx, this%HSqrCplxCam,& @@ -4086,8 +4574,8 @@ subroutine initializeDynamics(this, boundaryCond, coord, orb, neighbourList, nNe call initLatticeVectors(this, boundaryCond) end if - call initTDOutput(this, this%dipoleDat, this%qDat, this%energyDat,& - & this%populDat, this%forceDat, this%coorDat, this%atomEnergyDat) + call initTDOutput(this, env, this%dipoleDat, this%qDat, this%energyDat, this%populDat,& + & this%forceDat, this%coorDat, this%atomEnergyDat) ! Write density at t=0 if (this%tPump .and. .not. this%tReadRestart) then @@ -4125,7 +4613,8 @@ subroutine initializeDynamics(this, boundaryCond, coord, orb, neighbourList, nNe call getChargeDipole(this, this%deltaQ, this%qq, this%multipole, this%dipole, q0,& & this%trho, this%Ssqr, this%Dsqr, this%Qsqr, coord, iSquare, eFieldScaling, this%qBlock,& - & this%qNetAtom, errStatus) + & this%qNetAtom, errStatus, neighbourList%iNeighbour, nNeighbourSK, orb, iSparseStart, & + & img2CentCell, env, ints) @:PROPAGATE_ERROR(errStatus) if (allocated(this%dispersion)) then call this%dispersion%updateOnsiteCharges(this%qNetAtom, orb, referenceN0,& @@ -4163,8 +4652,9 @@ subroutine initializeDynamics(this, boundaryCond, coord, orb, neighbourList, nNe end if ! Apply kick to rho if necessary (in restart case, check it starttime is 0 or not) + ! TODO: Kick for MPI if (this%tKick .and. this%startTime < this%dt / 10.0_dp) then - call kickDM(this, this%trho, this%Ssqr, this%Sinv, iSquare, coord) + call kickDM(this, env, this%trho, this%Ssqr, this%Sinv, iSquare, coord, orb) end if call getPositionDependentEnergy(this, env, this%energy, coordAll, img2CentCell, neighbourList,& @@ -4179,7 +4669,7 @@ subroutine initializeDynamics(this, boundaryCond, coord, orb, neighbourList, nNe if (.not. this%tReadRestart .or. this%tProbe) then ! output ground state data - call writeTDOutputs(this, this%dipoleDat, this%qDat, this%energyDat, & + call writeTDOutputs(this, env, this%dipoleDat, this%qDat, this%energyDat, & & this%forceDat, this%coorDat, this%fdBondPopul, this%fdBondEnergy, this%atomEnergyDat,& & 0.0_dp, this%energy, this%energyKin, this%dipole, this%deltaQ, coord, this%totalForce,& & 0) @@ -4208,13 +4698,14 @@ subroutine initializeDynamics(this, boundaryCond, coord, orb, neighbourList, nNe @:PROPAGATE_ERROR(errStatus) call updateH0S(this, env, ints, orb, skHamCont, skOverCont, neighbourList, nNeighbourSK,& & iSparseStart, img2CentCell, iSquare, coordAll, this%Sinv, this%Ssqr, this%ham0,& - & Dsqr=this%Dsqr, Qsqr=this%Qsqr) + & errStatus, Dsqr=this%Dsqr, Qsqr=this%Qsqr) @:PROPAGATE_ERROR(errStatus) end if call getChargeDipole(this, this%deltaQ, this%qq, this%multipole, this%dipole, q0,& & this%rho, this%Ssqr, this%Dsqr, this%Qsqr, coord, iSquare, eFieldScaling, this%qBlock,& - & this%qNetAtom, errStatus) + & this%qNetAtom, errStatus, neighbourList%iNeighbour, nNeighbourSK, orb, iSparseStart,& + & img2CentCell, env, ints) @:PROPAGATE_ERROR(errStatus) if (allocated(this%dispersion)) then call this%dispersion%updateOnsiteCharges(this%qNetAtom, orb, referenceN0,& @@ -4231,7 +4722,7 @@ subroutine initializeDynamics(this, boundaryCond, coord, orb, neighbourList, nNe if (this%tForces) then call getForces(this, this%movedAccel, this%totalForce, this%rho, this%H1, this%Sinv,& & neighbourList, nNeighbourSK, symNeighbourList, nNeighbourCamSym, img2CentCell,& - & iSparseStart, iSquare, this%potential, orb, skHamCont, skOverCont, this%qq, q0,& + & iSparseStart, iSquare, this%potential, orb, skHamCont, skOverCont, this%qq, q0,& & repulsive, coordAll, this%rhoPrim, this%ErhoPrim, 0, env, hybridXc, this%deltaRho,& & this%Ssqr, errStatus) @:PROPAGATE_ERROR(errStatus) @@ -4359,7 +4850,7 @@ subroutine doTdStep(this, boundaryCond, iStep, coord, orb, neighbourList, nNeigh type(TStatus), intent(inout) :: errStatus real(dp), allocatable :: velInternal(:,:) - real(dp) :: new3Coord(3, this%nMovedAtom) + real(dp) :: new3Coord(3, this%nMovedAtom), propStep character(sc) :: dumpIdx logical :: tProbeFrameWrite integer :: iKS @@ -4407,7 +4898,7 @@ subroutine doTdStep(this, boundaryCond, iStep, coord, orb, neighbourList, nNeigh if ((mod(iStep, this%writeFreq) == 0)) then call getBondPopulAndEnergy(this, this%bondWork, this%lastBondPopul, this%rhoPrim, this%ham0,& & ints, neighbourList%iNeighbour, nNeighbourSK, iSparseStart, img2CentCell, iSquare,& - & this%fdBondEnergy, this%fdBondPopul, this%time) + & this%fdBondEnergy, this%fdBondPopul, this%time, env) end if do iKS = 1, this%parallelKS%nLocalKS @@ -4416,15 +4907,26 @@ subroutine doTdStep(this, boundaryCond, iStep, coord, orb, neighbourList, nNeigh if (this%tEulers .and. (iStep > 0) .and. (mod(iStep, max(this%eulerFreq,1)) == 0)) then call zcopy(this%nOrbs*this%nOrbs, this%rho(:,:,iKS), 1, this%rhoOld(:,:,iKS), 1) - call propagateRho(this, this%rhoOld(:,:,iKS), this%rho(:,:,iKS),& - & this%H1(:,:,iKS), this%Sinv(:,:,iKS), this%dt) + propStep = this%dt else - call propagateRho(this, this%rhoOld(:,:,iKS), this%rho(:,:,iKS),& - & this%H1(:,:,iKS), this%Sinv(:,:,iKS), 2.0_dp * this%dt) + propStep = 2.0_dp * this%dt end if + #:if WITH_SCALAPACK + call propagateRhoBlacs(this, this%rhoOld(:,:,iKS), this%rho(:,:,iKS),& + & this%H1(:,:,iKS), this%Sinv(:,:,iKS), propStep) + #:else + call propagateRho(this, this%rhoOld(:,:,iKS), this%rho(:,:,iKS),& + & this%H1(:,:,iKS), this%Sinv(:,:,iKS), propStep) + #:endif else + #:if WITH_SCALAPACK + this%H1(:,:,iKS) = imag * this%H1(:,:,iKS) + call propagateRhoBlacs(this, this%rhoOld(:,:,iKS), this%rho(:,:,iKS),& + & this%H1(:,:,iKS), this%Sinv(:,:,iKS), 2.0_dp * this%dt) + #:else call propagateRhoRealH(this, this%rhoOld(:,:,iKS), this%rho(:,:,iKS),& & this%H1(:,:,iKS), this%Sinv(:,:,iKS), 2.0_dp * this%dt) + #:endif end if end do @@ -4442,16 +4944,16 @@ subroutine doTdStep(this, boundaryCond, iStep, coord, orb, neighbourList, nNeigh ! rest of the quantities but with the same time label. ! TODO: fix tests values for populations so that it becomes exactly syncronized with the ! other outputs - call getTDPopulations(this, this%occ, this%rho, this%Eiginv, this%EiginvAdj, this%populDat,& - & this%time-this%dt, iKS) + call getTDPopulations(this, env, this%occ, this%rho, this%Eiginv, this%EiginvAdj,& + & this%populDat, this%time-this%dt, iKS, neighbourList%iNeighbour, nNeighbourSK,& + & orb%mOrb, iSparseStart, img2CentCell, this%rhoPrim) end do end if if (.not. this%tReadRestart .or. (iStep > 0) .or. this%tProbe) then - call writeTDOutputs(this, this%dipoleDat, this%qDat, this%energyDat, & - & this%forceDat, this%coorDat, this%fdBondPopul, this%fdBondEnergy, this%atomEnergyDat,& - & this%time, this%energy, this%energyKin, this%dipole, this%deltaQ, coord,& - & this%totalForce, iStep) + call writeTDOutputs(this, env, this%dipoleDat, this%qDat, this%energyDat, this%forceDat,& + & this%coorDat, this%fdBondPopul, this%fdBondEnergy, this%atomEnergyDat, this%time,& + & this%energy, this%energyKin, this%dipole, this%deltaQ, coord, this%totalForce, iStep) end if if (this%tWriteRestart .and. iStep > 0 .and. mod(iStep, max(this%restartFreq,1)) == 0) then @@ -4461,8 +4963,13 @@ subroutine doTdStep(this, boundaryCond, iStep, coord, orb, neighbourList, nNeigh else velInternal(:,:) = 0.0_dp end if + #:if WITH_SCALAPACK + call writeRestartFileBlacs(this%rho, this%rhoOld, coord, velInternal, this%time, this%dt,& + & restartFileName, env, this%denseDesc, this%parallelKS, errStatus) + #:else call writeRestartFile(this%rho, this%rhoOld, coord, velInternal, this%time, this%dt, & &restartFileName, this%tWriteRestartAscii, errStatus) + #:endif @:PROPAGATE_ERROR(errStatus) deallocate(velInternal) end if @@ -4472,7 +4979,7 @@ subroutine doTdStep(this, boundaryCond, iStep, coord, orb, neighbourList, nNeigh & .and. (mod(iStep-this%PpIni, max(this%PpFreq,1)) == 0) if (tProbeFrameWrite) then write(dumpIdx,'(I0)')int((iStep-this%PpIni)/this%PpFreq) - allocate(velInternal(3,size(this%movedVelo, dim=2))) + allocate(velInternal(3, size(this%movedVelo, dim=2))) if (this%tIons) then call state(this%pMDIntegrator, velocities=velInternal) else @@ -4493,13 +5000,14 @@ subroutine doTdStep(this, boundaryCond, iStep, coord, orb, neighbourList, nNeigh @:PROPAGATE_ERROR(errStatus) call updateH0S(this, env, ints, orb, skHamCont, skOverCont, neighbourList, nNeighbourSK,& & iSparseStart, img2CentCell, iSquare, coordAll, this%Sinv, this%Ssqr, this%ham0,& - & Dsqr=this%Dsqr, Qsqr=this%Qsqr) + & errStatus, Dsqr=this%Dsqr, Qsqr=this%Qsqr) @:PROPAGATE_ERROR(errStatus) end if call getChargeDipole(this, this%deltaQ, this%qq, this%multipole, this%dipole, q0,& & this%rho, this%Ssqr, this%Dsqr, this%Qsqr, coord, iSquare, eFieldScaling, this%qBlock,& - & this%qNetAtom, errStatus) + & this%qNetAtom, errStatus, neighbourList%iNeighbour, nNeighbourSK, orb, iSparseStart,& + & img2CentCell, env, ints) @:PROPAGATE_ERROR(errStatus) if (allocated(this%dispersion)) then call this%dispersion%updateOnsiteCharges(this%qNetAtom, orb, referenceN0,& @@ -4651,16 +5159,21 @@ end subroutine handleCoordinateChange !> Handles deallocation, closing outputs and autotest writing - subroutine finalizeDynamics(this) + subroutine finalizeDynamics(this, env) !> ElecDynamics instance type(TElecDynamics), intent(inout) :: this - call closeTDOutputs(this) + !> Environment settings + type(TEnvironment), intent(in) :: env + + call closeTDOutputs(this, env) deallocate(this%Ssqr) deallocate(this%Sinv) deallocate(this%H1) + deallocate(this%trho) + deallocate(this%trhoOld) deallocate(this%RdotSprime) deallocate(this%qq) deallocate(this%deltaQ) @@ -4668,8 +5181,6 @@ subroutine finalizeDynamics(this) deallocate(this%chargePerShell) deallocate(this%occ) deallocate(this%totalForce) - deallocate(this%trho) - deallocate(this%trhoOld) if (allocated(this%Dsqr)) then deallocate(this%Dsqr) end if @@ -4709,4 +5220,78 @@ subroutine finalizeDynamics(this) end subroutine finalizeDynamics + +#:if WITH_SCALAPACK + !> gets diagonal elements of the distributed density matrix (occupations) + !! TODO: there is a more direct and faster way of getting these elements, + !! this can be improved in the future if it is a bottleneck. + subroutine unpackTDpopulBlacs(iNeighbour, nNeighbourSK, mOrb, iSparseStart, img2CentCell, T1R,& + & rhoPrim, env, occ, this, iKS, desc) + + !> Environment settings + type(TEnvironment), intent(in) :: env + + !> Neighbour list for the atoms (First index from 0!) + integer, intent(in) :: iNeighbour(0:, :) + + !> Nr. of neighbours for the atoms. + integer, intent(in) :: nNeighbourSK(:) + + !> Maximal number of orbitals on an atom. + integer, intent(in) :: mOrb + + !> indexing array for the sparse Hamiltonian + integer, intent(in) :: iSparseStart(0:, :) + + !> Mapping between image atoms and corresponding atom in the central cell. + integer, intent(in) :: img2CentCell(:) + + !> ElecDynamics instance + type(TElecDynamics), intent(in) :: this + + !> Sparse density matrix + real(dp), allocatable, intent(inout) :: rhoPrim(:,:) + + !> K-Spin mixed index + integer, intent(in) :: iKS + + !> Molecular orbital occupations + real(dp), intent(inout) :: occ(:) + + !> Dense matrix description + type(TDenseDescr), intent(in) :: desc + + real(dp), intent(in) :: T1R(:,:) + + real(dp), allocatable :: popSparse(:) + real(dp), allocatable :: arrayMO(:,:) + integer :: iNeigh, iSpin, iOrb1, iOrig, nOrb, iK, ii + integer :: rhoDim, iAtom, jj + + rhoDim = size(rhoPrim, dim=1) + allocate(popSparse(rhoDim)) + allocate(arrayMO(mOrb,mOrb)) + + popSparse(:) = 0.0_dp + ! 1. dense to sparse + call packRhoRealBlacs(env%blacs, this%denseDesc, T1R, iNeighbour, nNeighbourSK, mOrb,& + & iSparseStart, img2CentCell, popSparse) + + ! 2. collect all the elements of the sparse DM + call mpifx_allreduceip(env%mpi%globalComm, popSparse, MPI_SUM) + + ! 3. get the diagonal blocks and then the diagonal elements + do iAtom = 1, this%nAtom + arrayMO(:,:) = 0.0_dp + ii = desc%iAtomStart(iAtom) + nOrb = desc%iAtomStart(iAtom+1) - ii + iNeigh = 0 + iOrig = iSparseStart(iNeigh, iAtom) + 1 + ! Diagonal = every (nOrb + 1)th element in the flat form of an nOrb1 x nOrb1 matrix + occ(ii : ii + nOrb - 1) = popSparse(iOrig : iOrig + nOrb * nOrb - 1 : nOrb + 1) + end do + + end subroutine unpackTDpopulBlacs +#:endif + end module dftbp_timedep_timeprop diff --git a/test/app/dftb+/tests b/test/app/dftb+/tests index 12407c53a4..2e161a0600 100644 --- a/test/app/dftb+/tests +++ b/test/app/dftb+/tests @@ -22,32 +22,31 @@ timeprop/C4H6_rs #? OMP_THREADS <= 4 and not WITH_MPI timeprop/C4H6_rs_Singlet #? OMP_THREADS <= 4 and not WITH_MPI timeprop/benzene_Ehrenfest_restart #? OMP_THREADS <= 4 and not WITH_MPI timeprop/benzene_Ehrenfest_restartAscii #? OMP_THREADS <= 4 and not WITH_MPI -timeprop/benzene_kick_bndpop #? OMP_THREADS == 1 and not WITH_MPI +timeprop/benzene_kick_bndpop #? OMP_THREADS == 1 and MPI_PROCS <=2 timeprop/benzene_kick_gfn1 #? OMP_THREADS <= 4 and not WITH_MPI and WITH_TBLITE -timeprop/benzene_laser #? OMP_THREADS <= 4 and not WITH_MPI +timeprop/benzene_laser #? OMP_THREADS <= 4 and MPI_PROCS <=2 timeprop/benzene_laser_gfn2 #? OMP_THREADS <= 4 and not WITH_MPI and WITH_TBLITE -timeprop/benzene_sin2 #? OMP_THREADS <= 4 and not WITH_MPI -timeprop/benzene_gaussian #? OMP_THREADS <= 4 and not WITH_MPI +timeprop/benzene_sin2 #? OMP_THREADS <= 4 and MPI_PROCS <=2 +timeprop/benzene_gaussian #? OMP_THREADS <= 4 and MPI_PROCS <=2 timeprop/GNR_kpoint_kick #? OMP_THREADS <= 4 and not WITH_MPI and LONG_TEST timeprop/GNR_kpoint_restart #? OMP_THREADS <= 4 and not WITH_MPI and LONG_TEST -timeprop/H2_singlet #? OMP_THREADS <= 1 and not WITH_MPI -timeprop/H2_triplet #? OMP_THREADS <= 1 and not WITH_MPI -timeprop/H2_exc_atoms #? OMP_THREADS <= 1 and not WITH_MPI -timeprop/C60_kick #? OMP_THREADS <= 4 and not WITH_MPI and LONG_TEST -timeprop/C60_triplet #? OMP_THREADS <= 4 and not WITH_MPI and LONG_TEST +timeprop/H2_singlet #? OMP_THREADS <= 4 and MPI_PROCS <=2 +timeprop/H2_triplet #? OMP_THREADS <= 4 and MPI_PROCS <=2 +timeprop/H2_exc_atoms #? OMP_THREADS <= 4 and MPI_PROCS <=2 +timeprop/C60_kick #? OMP_THREADS <= 4 and MPI_PROCS <=4 and LONG_TEST +timeprop/C60_triplet #? OMP_THREADS <= 4 and MPI_PROCS <=4 and LONG_TEST timeprop/NO_pSIC #? OMP_THREADS == 1 and not WITH_MPI -timeprop/NO_onsite #? OMP_THREADS == 1 and not WITH_MPI -timeprop/benzene_kick_periodic #? OMP_THREADS <= 1 and not WITH_MPI -timeprop/GNR_periodic_kick #? OMP_THREADS == 1 and not WITH_MPI -timeprop/CH3_dftbu_singlet #? OMP_THREADS == 1 and not WITH_MPI -timeprop/GNR_periodic_laser #? OMP_THREADS == 1 and not WITH_MPI -timeprop/benzene_ions_pulse #? OMP_THREADS == 1 and not WITH_MPI -timeprop/GNR_periodic_ions #? OMP_THREADS == 1 and not WITH_MPI and LONG_TEST -timeprop/graphite_dispersion_pulse #? OMP_THREADS == 1 and not WITH_MPI and LONG_TEST -timeprop/PDI_charged #? OMP_THREADS == 1 and not WITH_MPI -timeprop/benzene_circpol #? OMP_THREADS == 1 and not WITH_MPI - -timeprop/benzene_rs #? OMP_THREADS <= 1 and not WITH_MPI +timeprop/NO_onsite #? OMP_THREADS <= 4 and not WITH_MPI +timeprop/benzene_kick_periodic #? OMP_THREADS <= 4 and not WITH_MPI +timeprop/GNR_periodic_kick #? OMP_THREADS <= 4 and MPI_PROCS <=2 +timeprop/CH3_dftbu_singlet #? OMP_THREADS <= 4 and not WITH_MPI +timeprop/GNR_periodic_laser #? OMP_THREADS <= 4 and MPI_PROCS <=2 +timeprop/benzene_ions_pulse #? OMP_THREADS <= 4 and not WITH_MPI +timeprop/GNR_periodic_ions #? OMP_THREADS <= 4 and not WITH_MPI and LONG_TEST +timeprop/graphite_dispersion_pulse #? OMP_THREADS <= 4 and not WITH_MPI and LONG_TEST +timeprop/PDI_charged #? OMP_THREADS <= 4 and MPI_PROCS <=2 +timeprop/benzene_circpol #? OMP_THREADS <= 4 and MPI_PROCS <=2 +timeprop/benzene_rs #? OMP_THREADS <= 4 and not WITH_MPI helical/10-0Ctube #? MPI_PROCS <= 2 helical/10-0CtubeC_10 #? MPI_PROCS == 1 diff --git a/test/app/dftb+/timeprop/C4H6_rs/dftb_in.hsd b/test/app/dftb+/timeprop/C4H6_rs/dftb_in.hsd index e4f225fbd9..946a8beabe 100644 --- a/test/app/dftb+/timeprop/C4H6_rs/dftb_in.hsd +++ b/test/app/dftb+/timeprop/C4H6_rs/dftb_in.hsd @@ -44,5 +44,11 @@ Parallel { # Allow OMP threads explicitely to test for hybrid parallelisation with # MPI-binary. (Check the manual before using this in production runs!) UseOmpThreads = Yes + # Due to small size of problem, allow 2 MPI processors by shrinking blocks + # Do not use this setting in production runs + Blacs = BlockSize { + 16 + } + } diff --git a/test/app/dftb+/timeprop/C4H6_rs_Singlet/dftb_in.hsd b/test/app/dftb+/timeprop/C4H6_rs_Singlet/dftb_in.hsd index 4357975e22..3ad420f53a 100644 --- a/test/app/dftb+/timeprop/C4H6_rs_Singlet/dftb_in.hsd +++ b/test/app/dftb+/timeprop/C4H6_rs_Singlet/dftb_in.hsd @@ -58,5 +58,10 @@ Parallel { # Allow OMP threads explicitely to test for hybrid parallelisation with # MPI-binary. (Check the manual before using this in production runs!) UseOmpThreads = Yes + # Due to small size of problem, allow 2 MPI processors by shrinking blocks + # Do not use this setting in production runs + Blacs = BlockSize { + 16 + } } diff --git a/test/app/dftb+/timeprop/C4H6_rs_Triplet/dftb_in.hsd b/test/app/dftb+/timeprop/C4H6_rs_Triplet/dftb_in.hsd index 88bcbd1666..a2d4f0355d 100644 --- a/test/app/dftb+/timeprop/C4H6_rs_Triplet/dftb_in.hsd +++ b/test/app/dftb+/timeprop/C4H6_rs_Triplet/dftb_in.hsd @@ -57,5 +57,9 @@ Parallel { # Allow OMP threads explicitely to test for hybrid parallelisation with # MPI-binary. (Check the manual before using this in production runs!) UseOmpThreads = Yes + # Due to small size of problem, allow 2 MPI processors by shrinking blocks + Blacs = BlockSize { + 16 + } } diff --git a/test/app/dftb+/timeprop/C60_kick/dftb_in.hsd b/test/app/dftb+/timeprop/C60_kick/dftb_in.hsd index 121c203c3a..ff42579cff 100644 --- a/test/app/dftb+/timeprop/C60_kick/dftb_in.hsd +++ b/test/app/dftb+/timeprop/C60_kick/dftb_in.hsd @@ -97,3 +97,7 @@ ElectronDynamics = { } FieldStrength [v/a] = 0.001 } + +Parallel = { +UseOmpThreads = Yes +} diff --git a/test/app/dftb+/timeprop/C60_triplet/dftb_in.hsd b/test/app/dftb+/timeprop/C60_triplet/dftb_in.hsd index c5f272092e..ed1b45c787 100644 --- a/test/app/dftb+/timeprop/C60_triplet/dftb_in.hsd +++ b/test/app/dftb+/timeprop/C60_triplet/dftb_in.hsd @@ -104,3 +104,9 @@ ElectronDynamics = { } FieldStrength [v/a] = 0.001 } + +Parallel { + # Allow OMP threads explicitely to test for hybrid parallelisation with + # MPI-binary. (Check the manual before using this in production runs!) + UseOmpThreads = Yes +} diff --git a/test/app/dftb+/timeprop/CH3_dftbu_singlet/dftb_in.hsd b/test/app/dftb+/timeprop/CH3_dftbu_singlet/dftb_in.hsd index 08243c7db4..13ab5b81e5 100644 --- a/test/app/dftb+/timeprop/CH3_dftbu_singlet/dftb_in.hsd +++ b/test/app/dftb+/timeprop/CH3_dftbu_singlet/dftb_in.hsd @@ -74,4 +74,8 @@ Parallel { # Allow OMP threads explicitely to test for hybrid parallelisation with # MPI-binary. (Check the manual before using this in production runs!) UseOmpThreads = Yes + Blacs = BlockSize { + # Do not use this setting in production runs + 1 + } } diff --git a/test/app/dftb+/timeprop/GNR_periodic_kick/dftb_in.hsd b/test/app/dftb+/timeprop/GNR_periodic_kick/dftb_in.hsd index 8cd2505860..c8ecd2c310 100644 --- a/test/app/dftb+/timeprop/GNR_periodic_kick/dftb_in.hsd +++ b/test/app/dftb+/timeprop/GNR_periodic_kick/dftb_in.hsd @@ -72,6 +72,7 @@ InputVersion = 20.1 # MPI-binary. (Check the manual before using this in production runs!) UseOmpThreads = Yes # Due to small size of problem, allow 2 MPI processors by shrinking blocks + # Do not use this setting in production runs Blacs = BlockSize { 16 } diff --git a/test/app/dftb+/timeprop/GNR_periodic_laser/dftb_in.hsd b/test/app/dftb+/timeprop/GNR_periodic_laser/dftb_in.hsd index 9bbef90252..f9de04c88d 100644 --- a/test/app/dftb+/timeprop/GNR_periodic_laser/dftb_in.hsd +++ b/test/app/dftb+/timeprop/GNR_periodic_laser/dftb_in.hsd @@ -73,6 +73,7 @@ InputVersion = 20.1 # MPI-binary. (Check the manual before using this in production runs!) UseOmpThreads = Yes # Due to small size of problem, allow 2 MPI processors by shrinking blocks + # Do not use this setting in production runs Blacs = BlockSize { 16 } diff --git a/test/app/dftb+/timeprop/H2_exc_atoms/dftb_in.hsd b/test/app/dftb+/timeprop/H2_exc_atoms/dftb_in.hsd index 5571a325ae..bfa3e875d6 100644 --- a/test/app/dftb+/timeprop/H2_exc_atoms/dftb_in.hsd +++ b/test/app/dftb+/timeprop/H2_exc_atoms/dftb_in.hsd @@ -48,4 +48,9 @@ Parallel { # Allow OMP threads explicitely to test for hybrid parallelisation with # MPI-binary. (Check the manual before using this in production runs!) UseOmpThreads = Yes + # Due to small size of problem, allow 2 MPI processors by shrinking blocks + # Do not use this setting in production runs + Blacs = BlockSize { + 1 + } } diff --git a/test/app/dftb+/timeprop/H2_singlet/dftb_in.hsd b/test/app/dftb+/timeprop/H2_singlet/dftb_in.hsd index e9ed446c11..2372b95997 100644 --- a/test/app/dftb+/timeprop/H2_singlet/dftb_in.hsd +++ b/test/app/dftb+/timeprop/H2_singlet/dftb_in.hsd @@ -52,4 +52,9 @@ Parallel { # Allow OMP threads explicitely to test for hybrid parallelisation with # MPI-binary. (Check the manual before using this in production runs!) UseOmpThreads = Yes + # Due to small size of problem, allow 2 MPI processors by shrinking blocks + # Do not use this setting in production runs + Blacs = BlockSize { + 1 + } } diff --git a/test/app/dftb+/timeprop/H2_triplet/dftb_in.hsd b/test/app/dftb+/timeprop/H2_triplet/dftb_in.hsd index 4968bb79f1..cb9c7b3287 100644 --- a/test/app/dftb+/timeprop/H2_triplet/dftb_in.hsd +++ b/test/app/dftb+/timeprop/H2_triplet/dftb_in.hsd @@ -52,4 +52,10 @@ Parallel { # Allow OMP threads explicitely to test for hybrid parallelisation with # MPI-binary. (Check the manual before using this in production runs!) UseOmpThreads = Yes + # Due to small size of problem, allow 2 MPI processors by shrinking blocks + # Do not use this setting in production runs + Blacs = BlockSize { + 1 + } + } diff --git a/test/app/dftb+/timeprop/NO_onsite/dftb_in.hsd b/test/app/dftb+/timeprop/NO_onsite/dftb_in.hsd index 4594cb865c..0968dafc0d 100644 --- a/test/app/dftb+/timeprop/NO_onsite/dftb_in.hsd +++ b/test/app/dftb+/timeprop/NO_onsite/dftb_in.hsd @@ -73,4 +73,9 @@ Parallel { # Allow OMP threads explicitely to test for hybrid parallelisation with # MPI-binary. (Check the manual before using this in production runs!) UseOmpThreads = Yes + # Due to small size of problem, allow 2 MPI processors by shrinking blocks + Blacs = BlockSize { + 1 + } + } diff --git a/test/app/dftb+/timeprop/NO_pSIC/dftb_in.hsd b/test/app/dftb+/timeprop/NO_pSIC/dftb_in.hsd index f713b85bb7..d0f31dfdc8 100644 --- a/test/app/dftb+/timeprop/NO_pSIC/dftb_in.hsd +++ b/test/app/dftb+/timeprop/NO_pSIC/dftb_in.hsd @@ -74,4 +74,8 @@ Parallel { # Allow OMP threads explicitely to test for hybrid parallelisation with # MPI-binary. (Check the manual before using this in production runs!) UseOmpThreads = Yes + Blacs = BlockSize { + # Do not use this setting in production runs + 1 + } } diff --git a/test/app/dftb+/timeprop/PDI_charged/dftb_in.hsd b/test/app/dftb+/timeprop/PDI_charged/dftb_in.hsd index 03df66f744..4116339d2e 100644 --- a/test/app/dftb+/timeprop/PDI_charged/dftb_in.hsd +++ b/test/app/dftb+/timeprop/PDI_charged/dftb_in.hsd @@ -66,6 +66,7 @@ Prefix = {slakos/origin/mio-1-1/} WriteAutotestTag = Yes } InputVersion = 20.1 + ElectronDynamics = { Steps = 2000 TimeStep [au] = 0.2 @@ -74,3 +75,7 @@ InputVersion = 20.1 PolarizationDirection = "x" } } + + Parallel = { + UseOmpThreads = Yes + } diff --git a/test/app/dftb+/timeprop/benzene_circpol/dftb_in.hsd b/test/app/dftb+/timeprop/benzene_circpol/dftb_in.hsd index 211105654e..c74ea44766 100644 --- a/test/app/dftb+/timeprop/benzene_circpol/dftb_in.hsd +++ b/test/app/dftb+/timeprop/benzene_circpol/dftb_in.hsd @@ -60,6 +60,7 @@ Parallel { # MPI-binary. (Check the manual before using this in production runs!) UseOmpThreads = Yes # Due to small size of problem, allow 2 MPI processors by shrinking blocks + # Do not use this setting in production runs Blacs = BlockSize { 16 } diff --git a/test/app/dftb+/timeprop/benzene_gaussian/dftb_in.hsd b/test/app/dftb+/timeprop/benzene_gaussian/dftb_in.hsd index 2761889165..d42f74b1ff 100644 --- a/test/app/dftb+/timeprop/benzene_gaussian/dftb_in.hsd +++ b/test/app/dftb+/timeprop/benzene_gaussian/dftb_in.hsd @@ -59,6 +59,7 @@ Parallel { # MPI-binary. (Check the manual before using this in production runs!) UseOmpThreads = Yes # Due to small size of problem, allow 2 MPI processors by shrinking blocks + # Do not use this setting in production runs Blacs = BlockSize { 16 } diff --git a/test/app/dftb+/timeprop/benzene_kick_bndpop/dftb_in.hsd b/test/app/dftb+/timeprop/benzene_kick_bndpop/dftb_in.hsd index d04ec9e70c..4266a54fbf 100644 --- a/test/app/dftb+/timeprop/benzene_kick_bndpop/dftb_in.hsd +++ b/test/app/dftb+/timeprop/benzene_kick_bndpop/dftb_in.hsd @@ -56,6 +56,7 @@ Parallel { # MPI-binary. (Check the manual before using this in production runs!) UseOmpThreads = Yes # Due to small size of problem, allow 2 MPI processors by shrinking blocks + # Do not use this setting in production runs Blacs = BlockSize { 16 } diff --git a/test/app/dftb+/timeprop/benzene_laser/dftb_in.hsd b/test/app/dftb+/timeprop/benzene_laser/dftb_in.hsd index 5a55ea660d..8c8caebba2 100644 --- a/test/app/dftb+/timeprop/benzene_laser/dftb_in.hsd +++ b/test/app/dftb+/timeprop/benzene_laser/dftb_in.hsd @@ -56,6 +56,7 @@ Parallel { # MPI-binary. (Check the manual before using this in production runs!) UseOmpThreads = Yes # Due to small size of problem, allow 2 MPI processors by shrinking blocks + # Do not use this setting in production runs Blacs = BlockSize { 16 } diff --git a/test/app/dftb+/timeprop/benzene_laser_gfn2/dftb_in.hsd b/test/app/dftb+/timeprop/benzene_laser_gfn2/dftb_in.hsd index 7c50a3737d..6a6a7e932a 100644 --- a/test/app/dftb+/timeprop/benzene_laser_gfn2/dftb_in.hsd +++ b/test/app/dftb+/timeprop/benzene_laser_gfn2/dftb_in.hsd @@ -36,4 +36,14 @@ ElectronDynamics = { Options = { WriteAutotestTag = Yes } ParserOptions { ParserVersion = 9 } -Parallel { UseOmpThreads = Yes } + +Parallel { + # Allow OMP threads explicitely to test for hybrid parallelisation with + # MPI-binary. (Check the manual before using this in production runs!) + UseOmpThreads = Yes + # Due to small size of problem, allow 2 MPI processors by shrinking blocks + # Do not use this setting in production runs + Blacs = BlockSize { + 16 + } +} diff --git a/test/app/dftb+/timeprop/benzene_rs/dftb_in.hsd b/test/app/dftb+/timeprop/benzene_rs/dftb_in.hsd index 4d3556865e..933a48c536 100644 --- a/test/app/dftb+/timeprop/benzene_rs/dftb_in.hsd +++ b/test/app/dftb+/timeprop/benzene_rs/dftb_in.hsd @@ -55,6 +55,7 @@ Parallel { # MPI-binary. (Check the manual before using this in production runs!) UseOmpThreads = Yes # Due to small size of problem, allow 2 MPI processors by shrinking blocks + # Do not use this setting in production runs Blacs = BlockSize { 16 } diff --git a/test/app/dftb+/timeprop/benzene_sin2/dftb_in.hsd b/test/app/dftb+/timeprop/benzene_sin2/dftb_in.hsd index 40430cea7c..dc3d4b9ff3 100644 --- a/test/app/dftb+/timeprop/benzene_sin2/dftb_in.hsd +++ b/test/app/dftb+/timeprop/benzene_sin2/dftb_in.hsd @@ -58,6 +58,7 @@ Parallel { # MPI-binary. (Check the manual before using this in production runs!) UseOmpThreads = Yes # Due to small size of problem, allow 2 MPI processors by shrinking blocks + # Do not use this setting in production runs Blacs = BlockSize { 16 } From b627a6de5a2dcaed2f5c348e4a4b2316dba8d107 Mon Sep 17 00:00:00 2001 From: Ben Hourahine Date: Thu, 5 Feb 2026 14:22:07 +0000 Subject: [PATCH 2/2] Correct comment in API include file --- src/dftbp/api/mm/dftbplus.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/dftbp/api/mm/dftbplus.h b/src/dftbp/api/mm/dftbplus.h index 6b9b5eb32f..e0e4ab6758 100644 --- a/src/dftbp/api/mm/dftbplus.h +++ b/src/dftbp/api/mm/dftbplus.h @@ -351,7 +351,7 @@ void dftbp_set_coords_lattice_origin(DftbPlus *instance, const double *coords, * \param[in] cutoff Cutoff used to compute the neighbour list Unit: Bohr. * * \param[in] coordNeighbours Coordinates of all neighbour atom in all cells - * Shape: [3, nAllAtom]. Unit: Bohr. + * Shape: [nAllAtom, 3]. Unit: Bohr. * * \param[in] neighbour2CentCell Index of the atom in the central cell a neighbour atom corresponds * to Shape: [nAllAtom].