From 5bc1f51635bf2983343a205195dd86fe39145f02 Mon Sep 17 00:00:00 2001 From: Arjen Tamerus Date: Mon, 4 Feb 2019 16:22:12 +0000 Subject: [PATCH 01/79] More gfortran fixes --- diag.f90 | 79 ++++++++++++++++++++++------ fields.f90 | 36 ++++++------- lapack.f90 | 133 +++++++++++++++++++++++++++++------------------ makefile | 20 +++---- perturbation.f90 | 6 +-- plasma.f90 | 31 +++++++---- pot_xy2.f90 | 2 +- trove.f90 | 9 ++-- 8 files changed, 205 insertions(+), 111 deletions(-) diff --git a/diag.f90 b/diag.f90 index b68c6e4..fadcbf4 100644 --- a/diag.f90 +++ b/diag.f90 @@ -2090,18 +2090,18 @@ subroutine diag_dseupd(n,bterm,nroots,factor,maxitr_,tol,h,e) ! !dec$ if (arpack_ > 0) - #if arpack_ > 0 +#if arpack_ > 0 ! call dsaupd ( ido, bmat, n, which, nev, tol, resid, & ncv, v, ldv, iparam, ipntr, workd, workl, & lworkl, info ) - #else +#else !dec$ else ! write(out,"(/'Arpack was not activated yet. Please uncomment dsaupd and dseupd bellow')") stop 'Arpack was not activated' ! - #endif +#endif !dec$ end if ! !if (verbose>=4.and.iparam(5)>0) then @@ -2139,6 +2139,7 @@ subroutine diag_dseupd(n,bterm,nroots,factor,maxitr_,tol,h,e) rvec = .true. ! !dec$ if (arpack_ > 0) +#if arpack_ > 0 ! if (verbose>=5) write(out,"(/'Arpack: dseupd')") ! @@ -2153,11 +2154,13 @@ subroutine diag_dseupd(n,bterm,nroots,factor,maxitr_,tol,h,e) if (verbose>=5) write(out,"(/'Arpack: done!')") ! !dec$ else +#else ! write(out,"(/'Arpack was not activated yet. Please uncomment dsaupd and dseupd bellow')") stop 'Arpack was not activated' ! !dec$ end if +#endif ! if ( ierr < 0 ) then write(out,"(/'Error with_seupd, info = ',i8)") ierr @@ -2444,15 +2447,19 @@ subroutine matvec_p(comm,n,nloc,nprocs,kstart,dx,bterm,h,mv_buf,z,w) nprow = 1 ! !dec$ if (blacs_ > 0) +#ifdef blacs_ call BLACS_GRIDINFO( comm, nprow, npcol, myprow, mypcol ) myid= myprow +#endif !dec$ elseif (mpi_ > 0) +#ifdef mpi_ call MPI_COMM_RANK( comm, myid, ierr ) call MPI_COMM_SIZE( comm, nprocs_, ierr ) if (nprocs_/=nprocs) then write(out,"('matvec_p: inconsistent number of nprocs = ',2i6)") nprocs_,nprocs stop 'matvec_p: inconsistent number of nprocs s' endif +#endif !dec$ end if ! kend = kstart(myid) + nloc-1 @@ -2484,10 +2491,14 @@ subroutine matvec_p(comm,n,nloc,nprocs,kstart,dx,bterm,h,mv_buf,z,w) nx = iend-istart+1 ! !dec$ if (blacs_ > 0) +#ifdef blacs_ call dgerv2d( comm, nx, 1, mv_buf(istart:iend), nx, iprev, mypcol ) +#endif !dec$ end if !dec$ if (mpi_ > 0) +#ifdef mpi_ call mpi_recv(mv_buf(istart),nx,MPI_DOUBLE_PRECISION,iprev,myid,comm,ierr) +#endif !dec$ end if ! istart = max(istart,bterm(k,1)) @@ -2503,10 +2514,14 @@ subroutine matvec_p(comm,n,nloc,nprocs,kstart,dx,bterm,h,mv_buf,z,w) nx = iend-istart+1 ! !dec$ if (blacs_ > 0) +#ifdef blacs_ call dgesd2d( comm, nx, 1, z(istart:iend), nx, iprev, mypcol ) !dec$ end if !dec$ if (mpi_ > 0) +#endif +#ifdef mpi_ call mpi_send(mv_buf(istart),nx,MPI_DOUBLE_PRECISION,iprev,iprev,comm,ierr) +#endif !dec$ end if ! enddo @@ -2525,10 +2540,14 @@ subroutine matvec_p(comm,n,nloc,nprocs,kstart,dx,bterm,h,mv_buf,z,w) nx = iend-istart+1 ! !dec$ if (blacs_ > 0) +#ifdef blacs_ call dgerv2d( comm, nx, 1, mv_buf(istart:iend), nx, inext, mypcol ) +#endif !dec$ end if !dec$ if (mpi_ > 0) +#ifdef mpi_ call mpi_recv(mv_buf(istart),nx,MPI_DOUBLE_PRECISION,inext,myid,comm,ierr) +#endif !dec$ end if ! iend = min(bterm(k,2),iend) @@ -2544,10 +2563,14 @@ subroutine matvec_p(comm,n,nloc,nprocs,kstart,dx,bterm,h,mv_buf,z,w) nx = iend-istart+1 ! !dec$ if (blacs_ > 0) +#ifdef blacs_ call dgesd2d( comm, nx, 1, z(istart:iend), nx, inext, mypcol ) +#endif !dec$ end if !dec$ if (mpi_ > 0) +#ifdef mpi_ call mpi_send(mv_buf(istart),nx,MPI_DOUBLE_PRECISION,inext,inext,comm,ierr) +#endif !dec$ end if ! enddo @@ -2679,10 +2702,12 @@ subroutine diag_dseupd_p(n,bterm,nroots,factor,maxitr_,tol,h,e) !dec$ if (blacs_ > 0) +#ifdef blacs_ write(out,"('BLAS-PINFO-start')") call BLACS_PINFO( iam, nprocs ) print *,nprocs blacs_or_mpi = 'BLACS' +#endif !dec$ end if ! write(out,"('BLAS-PINFO-done')") @@ -2691,19 +2716,19 @@ subroutine diag_dseupd_p(n,bterm,nroots,factor,maxitr_,tol,h,e) !print *,nprocs !dec$ if (mpi_ > 0) - call MPI_INIT( ierr ) - comm = MPI_COMM_WORLD - call MPI_COMM_RANK( comm, myid, ierr ) - call MPI_COMM_SIZE( comm, nprocs, ierr ) - ! - print *,comm,myid,nprocs - ! - if (trim(blacs_or_mpi)=='BLACS') then - write(out,"('diag_dseupd_p: IT IS ILLEGAL TO USE MPI AND BLACS AT THE SAME TIME')") - stop 'diag_dseupd_p: IT IS ILLEGAL TO USE MPI AND BLACS AT THE SAME TIME' - endif - ! - blacs_or_mpi = 'MPI' + !!call MPI_INIT( ierr ) + !!comm = MPI_COMM_WORLD + !!call MPI_COMM_RANK( comm, myid, ierr ) + !!call MPI_COMM_SIZE( comm, nprocs, ierr ) + !!! + !!print *,comm,myid,nprocs + !!! + !!if (trim(blacs_or_mpi)=='BLACS') then + !! write(out,"('diag_dseupd_p: IT IS ILLEGAL TO USE MPI AND BLACS AT THE SAME TIME')") + !! stop 'diag_dseupd_p: IT IS ILLEGAL TO USE MPI AND BLACS AT THE SAME TIME' + !!endif + !!! + !!blacs_or_mpi = 'MPI' ! !dec$ end if ! @@ -2715,7 +2740,9 @@ subroutine diag_dseupd_p(n,bterm,nroots,factor,maxitr_,tol,h,e) if (nprocs .lt. 1) then nprocs = 1 !dec$ if (blacs_ > 0) +#ifdef blacs_ call BLACS_SETUP( iam, nprocs ) +#endif !dec$ end if ! print *,nprocs @@ -2806,12 +2833,14 @@ subroutine diag_dseupd_p(n,bterm,nroots,factor,maxitr_,tol,h,e) myprow = 1 ; mypcol = 1 ; myid = 1 ! !dec$ if (blacs_ > 0) +#ifdef blacs_ call BLACS_GET( 0, 0, comm ) call BLACS_GRIDINIT( comm, 'Row', nprow, npcol ) call BLACS_GRIDINFO( comm, nprow, npcol, myprow, mypcol ) ! write(out,"(' myprow, nprow, mypcol, npcol = ',4i8)") myprow, nprow, mypcol, npcol ! +#endif !dec$ end if ! if (verbose>=2.and.trim(blacs_or_mpi)=='BLACS') then @@ -2882,17 +2911,20 @@ subroutine diag_dseupd_p(n,bterm,nroots,factor,maxitr_,tol,h,e) ! !dec$ if (blacs_ > 0.or.mpi_ > 0) +#ifdef blacs_ ! call pdsaupd ( comm, ido, bmat, nloc, which, nev, tol, resid, & ncv, v, ldv, iparam, ipntr, workd, workl, & lworkl, info ) ! !dec$ else +#else ! call dsaupd ( ido, bmat, n, which, nev, tol, resid, & ncv, v, ldv, iparam, ipntr, workd, workl, & lworkl, info ) ! +#endif !dec$ end if ! !if (verbose>=4.and.iparam(5)>0) then @@ -2928,6 +2960,7 @@ subroutine diag_dseupd_p(n,bterm,nroots,factor,maxitr_,tol,h,e) rvec = .true. ! !dec$ if (blacs_ > 0) +#ifdef blacs_ ! if (verbose>=5) write(out,"(/'Arpack: dseupd')") ! @@ -2939,11 +2972,13 @@ subroutine diag_dseupd_p(n,bterm,nroots,factor,maxitr_,tol,h,e) if (verbose>=5) write(out,"(/'Arpack: done!')") ! +#else !dec$ else ! write(out,"(/'Arpack was not activated yet. Please uncomment dsaupd and dseupd bellow')") stop 'Arpack was not activated' ! +#endif !dec$ end if ! if ( ierr < 0 ) then @@ -2993,11 +3028,15 @@ subroutine diag_dseupd_p(n,bterm,nroots,factor,maxitr_,tol,h,e) 9000 continue ! !dec$ if (blacs_ > 0) +#ifdef blacs_ call BLACS_GRIDEXIT ( comm ) call BLACS_EXIT(0) +#endif !dec$ end if !dec$ if (mpi_ > 0) +#ifdef mpi_ call MPI_FINALIZE(rc) +#endif !dec$ end if @@ -3146,15 +3185,18 @@ subroutine dseupd_omp_arpack(n,bterm,nroots,factor,maxitr_,tol,h,e) ! !dec$ if (omparpack_ > 0) +#ifdef omparpack_ ! call dsaupd_ ( ido, bmat, n, which, nev, tol, resid, & ncv, v, ldv, iparam, ipntr, workd, workl, & lworkl, info ) !dec$ else +#else ! write(out,"(/'Arpack was not activated yet. Please uncomment dsaupd and dseupd bellow')") stop 'Arpack was not activated' ! +#endif !dec$ end if ! !if (verbose>=4.and.iparam(5)>0) then @@ -3192,6 +3234,7 @@ subroutine dseupd_omp_arpack(n,bterm,nroots,factor,maxitr_,tol,h,e) rvec = .true. ! !dec$ if (omparpack_ > 0) +#ifdef omparpack_ ! if (verbose>=5) write(out,"(/'Arpack: dseupd')") ! @@ -3206,10 +3249,12 @@ subroutine dseupd_omp_arpack(n,bterm,nroots,factor,maxitr_,tol,h,e) if (verbose>=5) write(out,"(/'Arpack: done!')") ! !dec$ else +#else ! write(out,"(/'Arpack was not activated yet. Please uncomment dsaupd and dseupd bellow')") stop 'Arpack was not activated' ! +#endif !dec$ end if ! if ( ierr < 0 ) then @@ -3304,6 +3349,7 @@ subroutine diag_propack(n,bterm,nroots,factor,maxiter,iverbose,tol,h,e) ! ! !dec$ if (propack_ < 1) +#ifndef propack_ ! write(out,'("PROPACK is not activated!")') stop 'PROPACK is not activated!' @@ -3311,6 +3357,7 @@ subroutine diag_propack(n,bterm,nroots,factor,maxiter,iverbose,tol,h,e) nroots = 0 return ! +#endif !dec$ endif ! nev = nroots diff --git a/fields.f90 b/fields.f90 index aadf0c0..2e94967 100644 --- a/fields.f90 +++ b/fields.f90 @@ -1781,6 +1781,7 @@ subroutine FLReadInput(NPTorder,Npolyads,Natoms,Nmodes,Jrot) select case (trim(job%bset(imode)%type)) ! case ('NUMEROV','BOX','LAGUERRE','FOURIER','LEGENDRE') + case default ! job%bset(imode)%coord_kinet = job%bset(imode)%type job%bset(imode)%coord_poten = job%bset(imode)%type @@ -1934,16 +1935,15 @@ subroutine FLReadInput(NPTorder,Npolyads,Natoms,Nmodes,Jrot) ! do i=1,Nmodes ! - if (job%bset(i)%type ==job%bset(i-1)%type .and.job%bset(i)%dim ==job%bset(i-1)%dim.and.& - job%bset(i)%coord_kinet==job%bset(i-1)%coord_kinet.and.job%bset(i)%coord_poten==job%bset(i-1)%coord_poten.and.& - job%bset(i)%class ==job%bset(i-1)%class .and.job%bset(i)%dvrpoints ==job%bset(i-1)%dvrpoints.and.& - job%bset(i)%range(1) ==job%bset(i-1)%range(1) .and.job%bset(i)%range(2) ==job%bset(i-1)%range(2).and.& - job%bset(i)%borders(1) ==job%bset(i-1)%borders(1) .and.job%bset(i)%borders(2) ==job%bset(i-1)%borders(2).and.& - job%bset(i)%res_coeffs ==job%bset(i-1)%res_coeffs .and.job%bset(i)%npoints ==job%bset(i-1)%npoints .and.& - job%bset(i)%periodic.eqv.job%bset(i-1)%periodic .and.job%bset(i)%iperiod ==job%bset(i-1)%iperiod ) then + if ((job%bset(i)%type ==job%bset(i-1)%type ).and.(job%bset(i)%dim ==job%bset(i-1)%dim ).and.& + (job%bset(i)%coord_kinet==job%bset(i-1)%coord_kinet).and.(job%bset(i)%coord_poten==job%bset(i-1)%coord_poten).and.& + (job%bset(i)%class ==job%bset(i-1)%class ).and.(job%bset(i)%dvrpoints ==job%bset(i-1)%dvrpoints ).and.& + (job%bset(i)%range(1) ==job%bset(i-1)%range(1) ).and.(job%bset(i)%range(2) ==job%bset(i-1)%range(2) ).and.& + (job%bset(i)%borders(1) ==job%bset(i-1)%borders(1) ).and.(job%bset(i)%borders(2) ==job%bset(i-1)%borders(2) ).and.& + (job%bset(i)%res_coeffs ==job%bset(i-1)%res_coeffs ).and.(job%bset(i)%npoints ==job%bset(i-1)%npoints ).and.& + (job%bset(i)%periodic.eqv.job%bset(i-1)%periodic ).and.(job%bset(i)%iperiod ==job%bset(i-1)%iperiod )) then ! job%bset(i)%species = ispecies - ! else ispecies = ispecies + 1 job%bset(i)%species = ispecies @@ -4026,7 +4026,7 @@ subroutine FLReadInput(NPTorder,Npolyads,Natoms,Nmodes,Jrot) ! enddo ! - call readi(i_t); extF%ifit(iterm,imu) = i_t + call readf(f_t); extF%ifit(iterm,imu) = int(f_t) call readf(f_t); extF%coef(iterm,imu) = f_t ! write(my_fmt,'(a,i0,a)') "(a,",Ncoords,"i1)" @@ -4160,7 +4160,7 @@ subroutine FLReadInput(NPTorder,Npolyads,Natoms,Nmodes,Jrot) ! to work only with all modes as one class in the contr. vibrational representaion, i.e. ! the vibr. Hamiltonian is assumed to be diagonal in this representaion: ! - if ( any( (/trim(job%IOj0ext_action),trim(job%IOj0matel_action)/) /='NONE' ) ) then + if ( trim(job%IOj0ext_action) /= 'NONE' .or. trim(job%IOj0matel_action) /='NONE' ) then ! job%vib_contract = .true. ! @@ -11506,7 +11506,7 @@ subroutine gmat_polynom(s_vib,s_rot,g_vib,g_rot,g_cor,pseudo) ! Only pseudopotential function (part 1) is obtained as a vector product of two s_rot polynoms: ! V_pseudi1(q1,q2) = 1/4*factor*sum_{n1} [s_rot x s_rot] /mass(n1) ! - !$omp sections private(s_1t,s_2t,s_3t,s_4t,s_5t,r_t,kindex) + ! $omp sections private(s_1t,s_2t,s_3t,s_4t,s_5t,r_t,kindex) ! ! Allocating two temporaly arrays s_1t, s2t and s_3t ! @@ -11517,7 +11517,7 @@ subroutine gmat_polynom(s_vib,s_rot,g_vib,g_rot,g_cor,pseudo) stop 'gmat_polynom: s_t - out of memory' endif ! - !$omp section + ! $omp section ! ! Vibrational part ! @@ -11538,7 +11538,7 @@ subroutine gmat_polynom(s_vib,s_rot,g_vib,g_rot,g_cor,pseudo) enddo enddo ! - !$omp section + ! $omp section ! if (job%verbose>=4) write(out,"('g_vib... Done!')") ! @@ -11591,7 +11591,7 @@ subroutine gmat_polynom(s_vib,s_rot,g_vib,g_rot,g_cor,pseudo) ! endif ! - !$omp section + ! $omp section ! ! Pseudopotential function: part 1 ! @@ -11620,7 +11620,7 @@ subroutine gmat_polynom(s_vib,s_rot,g_vib,g_rot,g_cor,pseudo) ! if (job%verbose>=4) write(out,"('pseudo1... Done!')") ! - !$omp section + ! $omp section ! !U2:=simplify( ! 1/4*sum(sum(add(add(add( @@ -11669,7 +11669,7 @@ subroutine gmat_polynom(s_vib,s_rot,g_vib,g_rot,g_cor,pseudo) ! if (job%verbose>=4) write(out,"('pseudo2... Done!')") ! - !$omp section + ! $omp section ! !U3:=simplify(sum(sum(sum(add( !> 1/4*1/mm[N0]*( S[q01][N0,x0]*DDS[q02][N0,x0][q02,q01]+1/2*DS[q01][N0,x0][q01]*DS[q02][N0,x0][q02] ) @@ -11726,7 +11726,7 @@ subroutine gmat_polynom(s_vib,s_rot,g_vib,g_rot,g_cor,pseudo) enddo enddo ! - !$omp section + ! $omp section ! if (job%verbose>=4) write(out,"('pseudo3a... Done!')") ! @@ -11829,7 +11829,7 @@ subroutine gmat_polynom(s_vib,s_rot,g_vib,g_rot,g_cor,pseudo) deallocate(r_t) ! deallocate(s_1t,s_2t,s_3t,s_4t,s_5t) - !$omp end sections + ! $omp end sections ! ! if (trim(molec%coords_transform)=='R-RHO'.and..false.) then diff --git a/lapack.f90 b/lapack.f90 index 8b13a95..46ba975 100644 --- a/lapack.f90 +++ b/lapack.f90 @@ -1,9 +1,9 @@ module lapack -!dec$ define arpack_ = 0 -!dec$ define blacs_ = 0 -!dec$ define mpi_ = 0 -!dec$ define omparpack_ = 0 +!!! dec $ define arpack_ = 0 +!!! dec $ define blacs_ = 0 +!!! dec $ define mpi_ = 0 +!!! dec $ define omparpack_ = 0 ! ! Simplistic type-agnostic LAPACK interface @@ -1301,18 +1301,21 @@ subroutine dseupd_arpack(n,bterm,nroots,factor,maxitr_,tol,h,e,IO) nev2 = nev rnorm = 1e-5 ! - !dec$ if (arpack_ > 0) + !!! dec $ if (arpack_ > 0) +#ifdef arpack_ ! call dsaupd( ido, bmat, n, which, nev, tol, resid, & ncv, v, ldv, iparam, ipntr, workd, workl, & lworkl, info) !,np, rnorm, nconv, nev2 ) ! - !dec$ else +#else + !!! dec $ else ! write(out,"(/'Arpack was not activated yet. Please uncomment dsaupd and dseupd bellow')") stop 'Arpack was not activated' ! - !dec$ end if +#endif + !!! dec $ end if ! ido = -1 ! @@ -1330,17 +1333,20 @@ subroutine dseupd_arpack(n,bterm,nroots,factor,maxitr_,tol,h,e,IO) ! has been exceeded. ! - !dec$ if (arpack_ > 0) + !!! dec $ if (arpack_ > 0) +#ifdef arpack_ ! call dsaupd ( ido, bmat, n, which, nev, tol, resid, & ncv, v, ldv, iparam, ipntr, workd, workl, & lworkl, info) !,np, rnorm, nconv, nev2) - !dec$ else + !!! dec $ else +#else ! write(out,"(/'Arpack was not activated yet. Please uncomment dsaupd and dseupd bellow')") stop 'Arpack was not activated' ! - !dec$ end if + !!! dec $ end if +#endif ! !if (verbose>=4.and.iparam(5)>0) then ! ! @@ -1401,7 +1407,8 @@ subroutine dseupd_arpack(n,bterm,nroots,factor,maxitr_,tol,h,e,IO) ! rvec = .true. ! - !dec$ if (arpack_ > 0) + !!! dec $ if (arpack_ > 0) +#ifdef arpack_ ! if (verbose>=5) write(out,"(/'Arpack: dseupd')") ! @@ -1415,12 +1422,14 @@ subroutine dseupd_arpack(n,bterm,nroots,factor,maxitr_,tol,h,e,IO) ! if (verbose>=5) write(out,"(/'Arpack: done!')") ! - !dec$ else + !!! dec $ else +#else ! write(out,"(/'Arpack was not activated yet. Please uncomment dsaupd and dseupd bellow')") stop 'Arpack was not activated' ! - !dec$ end if + !!! dec $ end if +#endif ! if ( ierr < 0 ) then write(out,"(/'Error with_seupd, info = ',i8)") ierr @@ -1539,17 +1548,19 @@ subroutine matvec_p(comm,n,nloc,nprocs,kstart,dx,bterm,h,mv_buf,z,w) myid = 1 nprow = 1 ! - !dec$ if (blacs_ > 0) + !!! dec $ if (blacs_ > 0) +#ifdef arpack_ call BLACS_GRIDINFO( comm, nprow, npcol, myprow, mypcol ) myid= myprow - !dec$ elseif (mpi_ > 0) + !!! dec $ elseif (mpi_ > 0) call MPI_COMM_RANK( comm, myid, ierr ) call MPI_COMM_SIZE( comm, nprocs_, ierr ) if (nprocs_/=nprocs) then write(out,"('matvec_p: inconsistent number of nprocs = ',2i8)") nprocs_,nprocs stop 'matvec_p: inconsistent number of nprocs s' endif - !dec$ end if + !!! dec $ end if +#endif ! kend = kstart(myid) + nloc-1 ! @@ -1579,12 +1590,14 @@ subroutine matvec_p(comm,n,nloc,nprocs,kstart,dx,bterm,h,mv_buf,z,w) ! nx = iend-istart+1 ! - !dec$ if (blacs_ > 0) + !!! dec $ if (blacs_ > 0) +#ifdef arpack_ call dgerv2d( comm, nx, 1, mv_buf(istart:iend), nx, iprev, mypcol ) - !dec$ end if - !dec$ if (mpi_ > 0) + !!! dec $ end if + !!! dec $ if (mpi_ > 0) call mpi_recv(mv_buf(istart),nx,MPI_DOUBLE_PRECISION,iprev,myid,comm,ierr) - !dec$ end if + !!! dec $ end if +#endif ! istart = max(istart,bterm(k,1)) ! @@ -1598,12 +1611,14 @@ subroutine matvec_p(comm,n,nloc,nprocs,kstart,dx,bterm,h,mv_buf,z,w) ! nx = iend-istart+1 ! - !dec$ if (blacs_ > 0) + !!! dec $ if (blacs_ > 0) +#ifdef arpack_ call dgesd2d( comm, nx, 1, z(istart:iend), nx, iprev, mypcol ) - !dec$ end if - !dec$ if (mpi_ > 0) + !!! dec $ end if + !!! dec $ if (mpi_ > 0) call mpi_send(mv_buf(istart),nx,MPI_DOUBLE_PRECISION,iprev,iprev,comm,ierr) - !dec$ end if + !!! dec $ end if +#endif ! enddo ! @@ -1620,12 +1635,14 @@ subroutine matvec_p(comm,n,nloc,nprocs,kstart,dx,bterm,h,mv_buf,z,w) ! nx = iend-istart+1 ! - !dec$ if (blacs_ > 0) + !!! dec $ if (blacs_ > 0) +#ifdef arpack_ call dgerv2d( comm, nx, 1, mv_buf(istart:iend), nx, inext, mypcol ) - !dec$ end if - !dec$ if (mpi_ > 0) + !!! dec $ end if + !!! dec $ if (mpi_ > 0) call mpi_recv(mv_buf(istart),nx,MPI_DOUBLE_PRECISION,inext,myid,comm,ierr) - !dec$ end if + !!! dec $ end if +#endif ! iend = min(bterm(k,2),iend) ! @@ -1639,12 +1656,14 @@ subroutine matvec_p(comm,n,nloc,nprocs,kstart,dx,bterm,h,mv_buf,z,w) ! nx = iend-istart+1 ! - !dec$ if (blacs_ > 0) + !!! dec $ if (blacs_ > 0) +#ifdef arpack_ call dgesd2d( comm, nx, 1, z(istart:iend), nx, inext, mypcol ) - !dec$ end if - !dec$ if (mpi_ > 0) + !!! dec $ end if + !!! dec $ if (mpi_ > 0) call mpi_send(mv_buf(istart),nx,MPI_DOUBLE_PRECISION,inext,inext,comm,ierr) - !dec$ end if + !!! dec $ end if +#endif ! enddo ! @@ -1770,12 +1789,13 @@ subroutine dseupd_p_arpack(n,bterm,nroots,factor,maxitr_,tol,h,e) ! %-----------------------% ! - !dec$ if (blacs_ > 0) + !!! dec $ if (blacs_ > 0) +#ifdef arpack_ call BLACS_PINFO( iam, nprocs ) blacs_or_mpi = 'BLACS' - !dec$ end if + !!! dec $ end if - !dec$ if (mpi_ > 0) + !!! dec $ if (mpi_ > 0) call MPI_INIT( ierr ) comm = MPI_COMM_WORLD call MPI_COMM_RANK( comm, myid, ierr ) @@ -1790,7 +1810,8 @@ subroutine dseupd_p_arpack(n,bterm,nroots,factor,maxitr_,tol,h,e) ! blacs_or_mpi = 'MPI' ! - !dec$ end if + !!! dec $ end if +#endif ! @@ -1799,9 +1820,11 @@ subroutine dseupd_p_arpack(n,bterm,nroots,factor,maxitr_,tol,h,e) ! if (nprocs .lt. 1) then nprocs = 1 - !dec$ if (blacs_ > 0) + !!! dec $ if (blacs_ > 0) +#ifdef arpack_ call BLACS_SETUP( iam, nprocs ) - !dec$ end if + !!! dec $ end if +#endif endif if (nprocs >maxnprocs) stop 'nprocs > maxnprocs' ! @@ -1886,11 +1909,13 @@ subroutine dseupd_p_arpack(n,bterm,nroots,factor,maxitr_,tol,h,e) ! ! myprow = 1 ; mypcol = 1 ; myid = 1 - !dec$ if (blacs_ > 0) + !!! dec $ if (blacs_ > 0) +#ifdef arpack_ call BLACS_GET( 0, 0, comm ) call BLACS_GRIDINIT( comm, 'Row', nprow, npcol ) call BLACS_GRIDINFO( comm, nprow, npcol, myprow, mypcol ) - !dec$ end if + !!! dec $ end if +#endif ! if (verbose>=2.and.trim(blacs_or_mpi)=='BLACS') write(out,"('myprow, nprow, mypcol, npcol, nprocs = ',5i8)") & myprow, nprow, mypcol, npcol, nprocs @@ -1957,19 +1982,22 @@ subroutine dseupd_p_arpack(n,bterm,nroots,factor,maxitr_,tol,h,e) ! has been exceeded. ! - !dec$ if (blacs_ > 0.or.mpi_ > 0) + !!! dec $ if (blacs_ > 0.or.mpi_ > 0) +#ifdef arpack_ ! call pdsaupd ( comm, ido, bmat, nloc, which, nev, tol, resid, & ncv, v, ldv, iparam, ipntr, workd, workl, & lworkl, info ) ! - !dec$ else + !!! dec $ else +#else ! call dsaupd ( ido, bmat, n, which, nev, tol, resid, & ncv, v, ldv, iparam, ipntr, workd, workl, & lworkl, info ) ! - !dec$ end if + !!! dec $ end if +#endif ! !if (verbose>=4.and.iparam(5)>0) then ! ! @@ -2003,7 +2031,8 @@ subroutine dseupd_p_arpack(n,bterm,nroots,factor,maxitr_,tol,h,e) ! rvec = .true. ! - !dec$ if (blacs_ > 0) + !!! dec $ if (blacs_ > 0) +#ifdef arpack_ ! if (verbose>=5) write(out,"(/'Arpack: dseupd')") ! @@ -2015,12 +2044,14 @@ subroutine dseupd_p_arpack(n,bterm,nroots,factor,maxitr_,tol,h,e) if (verbose>=5) write(out,"(/'Arpack: done!')") ! - !dec$ else + !!! dec $ else +#else ! write(out,"(/'Arpack was not activated yet. Please uncomment dsaupd and dseupd bellow')") stop 'Arpack was not activated' ! - !dec$ end if + !!! dec $ end if +#endif ! if ( ierr < 0 ) then write(out,"(/'Error with_seupd, info = ',i8)") ierr @@ -2068,13 +2099,15 @@ subroutine dseupd_p_arpack(n,bterm,nroots,factor,maxitr_,tol,h,e) ! 9000 continue ! - !dec$ if (blacs_ > 0) + !!! dec $ if (blacs_ > 0) +#ifdef arpack_ call BLACS_GRIDEXIT ( comm ) call BLACS_EXIT(0) - !dec$ end if - !dec$ if (mpi_ > 0) + !!! dec $ end if + !!! dec $ if (mpi_ > 0) call MPI_FINALIZE(rc) - !dec$ end if + !!! dec $ end if +#endif deallocate(v,workl,workd,d,resid,select,mv_buf) diff --git a/makefile b/makefile index fdd909e..31fda26 100644 --- a/makefile +++ b/makefile @@ -12,35 +12,35 @@ pot_user = pot_ch4 PLAT = _2205_i17 ###FOR = ifort -FOR = ifort -FFLAGS = -ip -openmp -O3 -static +FOR = mpif90 +FFLAGS = -fopenmp -ffree-line-length-none -march=native -O3 -fcray-pointer #ARPACK = ~/libraries/ARPACK/libarpack_omp_64.a #LAPACK = -mkl -LAPACK = -mkl=parallel +LAPACK = -lopenblas LIB = $(LAPACK) %.o : %.f90 - $(FOR) -c $(FFLAGS) $< + $(FOR) -cpp -c $(FFLAGS) $< ############################################################################### -trove.x: trove.o accuracy.o perturbation.o fields.o symmetry.o molecules.o me_numer.o me_str.o me_bnd.o me_rot.o \ +trove.x: trove.o accuracy.o perturbation.o fields.o symmetry.o molecules.o me_numer.o me_str.o me_bnd.o me_rot.o \ lapack.o plasma.o moltype.o refinement.o dipole.o refinement.o tran.o diag.o timer.o input.o \ - mol_xy.o mol_xy2.o mol_xy3.o mol_xy4.o mol_zxy2.o mol_zxy3.o mol_ch3oh.o mol_abcd.o mol_c2h4.o mol_c2h6.o \ - pot_xy2.o pot_xy3.o pot_xy4.o pot_zxy2.o pot_zxy3.o pot_ch3oh.o pot_abcd.o pot_c2h4.o pot_c2h6.o $(pot_user).o + mol_xy.o mol_xy2.o mol_xy3.o mol_xy4.o mol_zxy2.o mol_zxy3.o mol_ch3oh.o mol_abcd.o mol_c2h4.o mol_c2h6.o mol_c3h6.o \ + pot_xy2.o pot_xy3.o pot_xy4.o pot_zxy2.o pot_zxy3.o pot_ch3oh.o pot_abcd.o pot_c2h4.o pot_c2h6.o pot_c3h6.o $(pot_user).o $(FOR) $(FFLAGS) -o j-trove$(PLAT).x $^ $(LIB) trove.o: accuracy.o fields.o perturbation.o symmetry.o timer.o moltype.o dipole.o refinement.o tran.o perturbation.o: accuracy.o molecules.o lapack.o fields.o timer.o symmetry.o diag.o plasma.o fields.o: accuracy.o molecules.o lapack.o me_str.o timer.o me_numer.o input.o me_rot.o moltype.o symmetry.o me_bnd.o symmetry.o: accuracy.o -molecules.o: accuracy.o moltype.o mol_xy.o mol_xy2.o mol_xy3.o mol_xy4.o mol_zxy2.o mol_zxy3.o mol_ch3oh.o mol_abcd.o mol_c2h4.o mol_c2h6.o \ - lapack.o pot_xy2.o pot_xy3.o mol_xy4.o pot_zxy2.o pot_zxy3.o pot_ch3oh.o pot_abcd.o pot_c2h4.o pot_c2h6.o \ +molecules.o: accuracy.o moltype.o mol_xy.o mol_xy2.o mol_xy3.o mol_xy4.o mol_zxy2.o mol_zxy3.o mol_ch3oh.o mol_abcd.o mol_c2h4.o mol_c2h6.o mol_c3h6.o \ + lapack.o pot_xy2.o pot_xy3.o mol_xy4.o pot_zxy2.o pot_zxy3.o pot_ch3oh.o pot_abcd.o pot_c2h4.o pot_c2h6.o pot_c3h6.o \ symmetry.o $(pot_user).o me_numer.o: accuracy.o molecules.o timer.o @@ -66,6 +66,7 @@ mol_zxy3.o: accuracy.o moltype.o mol_ch3oh.o: accuracy.o moltype.o pot_ch3oh.o mol_c2h4.o: accuracy.o moltype.o mol_c2h6.o: accuracy.o moltype.o +mol_c3h6.o: accuracy.o moltype.o mol_abcd.o: accuracy.o moltype.o pot_abcd.o pot_ch4.o: accuracy.o moltype.o @@ -78,6 +79,7 @@ pot_c2h6.o: accuracy.o moltype.o pot_ch3oh.o: accuracy.o moltype.o pot_c2h4.o: accuracy.o moltype.o pot_c2h6.o: accuracy.o moltype.o +pot_c3h6.o: accuracy.o moltype.o pot_abcd.o: accuracy.o moltype.o lapack.o clean: diff --git a/perturbation.f90 b/perturbation.f90 index eaca068..f211e54 100644 --- a/perturbation.f90 +++ b/perturbation.f90 @@ -753,7 +753,7 @@ subroutine PTactive_space_init(bs_t) ! if (job%verbose>=2) then write(my_fmt,'(a,i0,a)') "(a,",Nmodes,"i5)" - write(out) + !write(out) write(out,my_fmt) 'Polyads estimated from range: ',(imode,imode=1,Nmodes) write(out,my_fmt) ' -> ',(pol_t(imode),imode=1,Nmodes) write(out,"(/'Adjusted (input) maximal polyad is: ',i5,'(',i5,')')") PT%Polyad_max,PT%Npolyads @@ -1080,7 +1080,7 @@ subroutine PTactive_space_init(bs_t) ! if (job%verbose>=4) then write(my_fmt,'(a,i0,a)') "(a,",NPTorder1,"i8)" - write(out) + !write(out) write(out,my_fmt) 'RangeOrder vs order ',(iorder ,iorder=0,NPTorder) write(out,my_fmt) 'RangeOrder -> ',(PT%RangeOrder(iorder),iorder=0,NPTorder) endif @@ -1095,7 +1095,7 @@ subroutine PTactive_space_init(bs_t) ! if (job%verbose>=4) then write(my_fmt,'(a,i0,a)') "(a,",Npolyads1,"i8)" - write(out) + !write(out) write(out,my_fmt) 'MaxIndex_nu vs polyad ',(ipol,ipol=0,Npolyads) write(out,my_fmt) ' -> ',(PT%MaxIndex_nu(ipol),ipol=0,Npolyads) endif diff --git a/plasma.f90 b/plasma.f90 index d311830..a99af6a 100644 --- a/plasma.f90 +++ b/plasma.f90 @@ -1,6 +1,7 @@ module plasma -!dec$ define plasma_ = 0 +!!! dec $ define plasma_ = 0 +#define plasma_ 0 ! ! Simplistic type-agnostic PLASMA interface @@ -21,13 +22,15 @@ module plasma subroutine plasma_diag_dsytrdx(n,a,w,nroots,Ethres_) ! - !dec$ if (plasma_ > 0) + !!! dec $ if (plasma_ > 0) +#if plasma_ > 0 INCLUDE "plasmaf.h" integer(ik), parameter :: VEC = PlasmaVec integer(ik), parameter :: UPLO = PlasmaLower EXTERNAL PLASMA_DSYTRDX INTEGER PLASMA_DSYTRDX - !dec$ end if + !!! dec $ end if +#endif ! integer , intent(in) :: n double precision, intent(inout) :: a(n,n) ! In: symmetric matrix to be diagonalized @@ -69,15 +72,21 @@ subroutine plasma_diag_dsytrdx(n,a,w,nroots,Ethres_) ! if (present(Ethres_)) Ethres = Ethres_ ! - !dec$ if (plasma_ == 0) + !!! dec $ if (plasma_ == 0) +#if plasma_ == 0 + !INCLUDE "plasmaf.h" write(out,"('Plasma is not activated, in plasma.f90 please set plasma_ to 1')") - !dec$ end if +#endif + !!! dec $ end if ! - !dec$ if (plasma_ > 0) + !!! dec $ if (plasma_ > 0) +#if plasma_ > 0 + INCLUDE "plasmaf.h" ! !call getsize(N,LDA,nprocs_) ! - !dec$ end if +#endif + !!! dec $ end if ! !$omp parallel private(tid) if (tid==0) then @@ -184,7 +193,9 @@ subroutine plasma_diag_dsytrdx(n,a,w,nroots,Ethres_) ! ! set up my PLASMA_DSYTRDX environmenta, then call the eigensolver ! - !dec$ if (plasma_ > 0) + !!! dec $ if (plasma_ > 0) +#if plasma_ > 0 + INCLUDE "plasmaf.h" ! call resetcore(corea,corec) CALL PRINTARGS(VEC, UPLO, N, LDA, LDQ, COREA, COREB, COREC, NB, IB) @@ -201,8 +212,8 @@ subroutine plasma_diag_dsytrdx(n,a,w,nroots,Ethres_) STOP END IF CALL USETPLASMAENV() - ! - !dec$ end if +#endif + !!! dec $ end if ! real_end = get_real_time() cpu_end = get_cpu_time () diff --git a/pot_xy2.f90 b/pot_xy2.f90 index ee15188..58b93e2 100644 --- a/pot_xy2.f90 +++ b/pot_xy2.f90 @@ -2101,7 +2101,7 @@ function MLpoten_xy2_dmbe(ncoords,natoms,local,xyz,force) result(f) ! xcos = cos(alpha) ! - call potv(v,r12,r32,xcos) + !call potv(v,r12,r32,xcos) v = v*tocm ! !v = v + MLpoten_xy2_bubukina(ncoords,natoms,local,xyz,force) diff --git a/trove.f90 b/trove.f90 index 2984c26..e6b6149 100644 --- a/trove.f90 +++ b/trove.f90 @@ -16,6 +16,7 @@ module tp_module implicit none + public ptmain ! ! Defining the calculations ! @@ -26,7 +27,7 @@ module tp_module ! ! Here we do the TROVE calculations ! - subroutine pt + subroutine ptmain ! integer(ik) :: NPTorder,Natoms,Nmodes,Npolyads integer(ik) :: j @@ -216,17 +217,17 @@ subroutine pt write(out,"(/'End of TROVE')") endif ! - end subroutine pt + end subroutine ptmain end module tp_module ! program driver ! - !use tp_module + use tp_module !call ompaffinity() - call pt + call ptmain end program driver From 8540f5657ffcba11b24c952241df64398d9f5107 Mon Sep 17 00:00:00 2001 From: IBeArjen Date: Tue, 5 Feb 2019 12:06:19 +0000 Subject: [PATCH 02/79] Merge branches + fix some string formats, memory bug --- diag.f90 | 144 ++++++++++++++++------------------------------- dipole.f90 | 25 ++++---- fields.f90 | 32 ++++++----- lapack.f90 | 107 ++++++++++++----------------------- makefile | 2 +- perturbation.f90 | 95 +++++++++++++++++++------------ pot_xy2.f90 | 1 + pot_zxy2.f90 | 6 +- pot_zxy3.f90 | 3 +- refinement.f90 | 94 +++++++++++++++---------------- symmetry.f90 | 2 +- tran.f90 | 4 +- 12 files changed, 230 insertions(+), 285 deletions(-) diff --git a/diag.f90 b/diag.f90 index fadcbf4..90041cb 100644 --- a/diag.f90 +++ b/diag.f90 @@ -1,10 +1,10 @@ module diag -!dec$ define arpack_ = 0 -!dec$ define blacs_ = 0 -!dec$ define mpi_ = 0 -!dec$ define omparpack_ = 0 -!dec$ define propack_ = 0 +#define arpack_ 0 +#define blacs_ 0 +#define mpi_ 0 +#define omparpack_ 0 +#define propack_ 0 ! ! Simplistic type-agnostic LAPACK interface @@ -2089,20 +2089,17 @@ subroutine diag_dseupd(n,bterm,nroots,factor,maxitr_,tol,h,e) ! has been exceeded. ! - !dec$ if (arpack_ > 0) -#if arpack_ > 0 +#if (arpack_ > 0) ! call dsaupd ( ido, bmat, n, which, nev, tol, resid, & ncv, v, ldv, iparam, ipntr, workd, workl, & lworkl, info ) #else - !dec$ else ! write(out,"(/'Arpack was not activated yet. Please uncomment dsaupd and dseupd bellow')") stop 'Arpack was not activated' ! #endif - !dec$ end if ! !if (verbose>=4.and.iparam(5)>0) then ! ! @@ -2138,8 +2135,7 @@ subroutine diag_dseupd(n,bterm,nroots,factor,maxitr_,tol,h,e) ! rvec = .true. ! - !dec$ if (arpack_ > 0) -#if arpack_ > 0 +#if (arpack_ > 0) ! if (verbose>=5) write(out,"(/'Arpack: dseupd')") ! @@ -2153,13 +2149,11 @@ subroutine diag_dseupd(n,bterm,nroots,factor,maxitr_,tol,h,e) ! if (verbose>=5) write(out,"(/'Arpack: done!')") ! - !dec$ else -#else +#else ! write(out,"(/'Arpack was not activated yet. Please uncomment dsaupd and dseupd bellow')") stop 'Arpack was not activated' ! - !dec$ end if #endif ! if ( ierr < 0 ) then @@ -2446,13 +2440,10 @@ subroutine matvec_p(comm,n,nloc,nprocs,kstart,dx,bterm,h,mv_buf,z,w) myid = 1 nprow = 1 ! - !dec$ if (blacs_ > 0) -#ifdef blacs_ +#if (blacs_ > 0) call BLACS_GRIDINFO( comm, nprow, npcol, myprow, mypcol ) myid= myprow -#endif - !dec$ elseif (mpi_ > 0) -#ifdef mpi_ +#elif (mpi_ > 0) call MPI_COMM_RANK( comm, myid, ierr ) call MPI_COMM_SIZE( comm, nprocs_, ierr ) if (nprocs_/=nprocs) then @@ -2460,7 +2451,6 @@ subroutine matvec_p(comm,n,nloc,nprocs,kstart,dx,bterm,h,mv_buf,z,w) stop 'matvec_p: inconsistent number of nprocs s' endif #endif - !dec$ end if ! kend = kstart(myid) + nloc-1 ! @@ -2490,16 +2480,12 @@ subroutine matvec_p(comm,n,nloc,nprocs,kstart,dx,bterm,h,mv_buf,z,w) ! nx = iend-istart+1 ! - !dec$ if (blacs_ > 0) -#ifdef blacs_ +#if (blacs_ > 0) call dgerv2d( comm, nx, 1, mv_buf(istart:iend), nx, iprev, mypcol ) #endif - !dec$ end if - !dec$ if (mpi_ > 0) -#ifdef mpi_ +#if (mpi_ > 0) call mpi_recv(mv_buf(istart),nx,MPI_DOUBLE_PRECISION,iprev,myid,comm,ierr) #endif - !dec$ end if ! istart = max(istart,bterm(k,1)) ! @@ -2513,16 +2499,12 @@ subroutine matvec_p(comm,n,nloc,nprocs,kstart,dx,bterm,h,mv_buf,z,w) ! nx = iend-istart+1 ! - !dec$ if (blacs_ > 0) -#ifdef blacs_ +#if (blacs_ > 0) call dgesd2d( comm, nx, 1, z(istart:iend), nx, iprev, mypcol ) - !dec$ end if - !dec$ if (mpi_ > 0) #endif -#ifdef mpi_ +#if (mpi_ > 0) call mpi_send(mv_buf(istart),nx,MPI_DOUBLE_PRECISION,iprev,iprev,comm,ierr) #endif - !dec$ end if ! enddo ! @@ -2539,16 +2521,12 @@ subroutine matvec_p(comm,n,nloc,nprocs,kstart,dx,bterm,h,mv_buf,z,w) ! nx = iend-istart+1 ! - !dec$ if (blacs_ > 0) -#ifdef blacs_ +#if (blacs_ > 0) call dgerv2d( comm, nx, 1, mv_buf(istart:iend), nx, inext, mypcol ) #endif - !dec$ end if - !dec$ if (mpi_ > 0) -#ifdef mpi_ +#if (mpi_ > 0) call mpi_recv(mv_buf(istart),nx,MPI_DOUBLE_PRECISION,inext,myid,comm,ierr) #endif - !dec$ end if ! iend = min(bterm(k,2),iend) ! @@ -2562,16 +2540,12 @@ subroutine matvec_p(comm,n,nloc,nprocs,kstart,dx,bterm,h,mv_buf,z,w) ! nx = iend-istart+1 ! - !dec$ if (blacs_ > 0) -#ifdef blacs_ +#if (blacs_ > 0) call dgesd2d( comm, nx, 1, z(istart:iend), nx, inext, mypcol ) #endif - !dec$ end if - !dec$ if (mpi_ > 0) -#ifdef mpi_ +#if (mpi_ > 0) call mpi_send(mv_buf(istart),nx,MPI_DOUBLE_PRECISION,inext,inext,comm,ierr) #endif - !dec$ end if ! enddo ! @@ -2701,36 +2675,34 @@ subroutine diag_dseupd_p(n,bterm,nroots,factor,maxitr_,tol,h,e) write(out,"('Start PARPACK-diagonalization')") - !dec$ if (blacs_ > 0) -#ifdef blacs_ +#if (blacs_ > 0) write(out,"('BLAS-PINFO-start')") call BLACS_PINFO( iam, nprocs ) print *,nprocs blacs_or_mpi = 'BLACS' #endif - !dec$ end if ! write(out,"('BLAS-PINFO-done')") ! !call BLACS_PINFO( iam, nprocs ) !print *,nprocs - !dec$ if (mpi_ > 0) - !!call MPI_INIT( ierr ) - !!comm = MPI_COMM_WORLD - !!call MPI_COMM_RANK( comm, myid, ierr ) - !!call MPI_COMM_SIZE( comm, nprocs, ierr ) - !!! - !!print *,comm,myid,nprocs - !!! - !!if (trim(blacs_or_mpi)=='BLACS') then - !! write(out,"('diag_dseupd_p: IT IS ILLEGAL TO USE MPI AND BLACS AT THE SAME TIME')") - !! stop 'diag_dseupd_p: IT IS ILLEGAL TO USE MPI AND BLACS AT THE SAME TIME' - !!endif - !!! - !!blacs_or_mpi = 'MPI' +#if (mpi_ > 0) + call MPI_INIT( ierr ) + comm = MPI_COMM_WORLD + call MPI_COMM_RANK( comm, myid, ierr ) + call MPI_COMM_SIZE( comm, nprocs, ierr ) ! - !dec$ end if + print *,comm,myid,nprocs + ! + if (trim(blacs_or_mpi)=='BLACS') then + write(out,"('diag_dseupd_p: IT IS ILLEGAL TO USE MPI AND BLACS AT THE SAME TIME')") + stop 'diag_dseupd_p: IT IS ILLEGAL TO USE MPI AND BLACS AT THE SAME TIME' + endif + ! + blacs_or_mpi = 'MPI' + ! +#endif ! @@ -2739,11 +2711,9 @@ subroutine diag_dseupd_p(n,bterm,nroots,factor,maxitr_,tol,h,e) ! if (nprocs .lt. 1) then nprocs = 1 - !dec$ if (blacs_ > 0) -#ifdef blacs_ +#if (blacs_ > 0) call BLACS_SETUP( iam, nprocs ) #endif - !dec$ end if ! print *,nprocs ! @@ -2832,8 +2802,7 @@ subroutine diag_dseupd_p(n,bterm,nroots,factor,maxitr_,tol,h,e) ! myprow = 1 ; mypcol = 1 ; myid = 1 ! - !dec$ if (blacs_ > 0) -#ifdef blacs_ +#if (blacs_ > 0) call BLACS_GET( 0, 0, comm ) call BLACS_GRIDINIT( comm, 'Row', nprow, npcol ) call BLACS_GRIDINFO( comm, nprow, npcol, myprow, mypcol ) @@ -2841,7 +2810,6 @@ subroutine diag_dseupd_p(n,bterm,nroots,factor,maxitr_,tol,h,e) write(out,"(' myprow, nprow, mypcol, npcol = ',4i8)") myprow, nprow, mypcol, npcol ! #endif - !dec$ end if ! if (verbose>=2.and.trim(blacs_or_mpi)=='BLACS') then write(out,"('myprow, nprow, mypcol, npcol, nprocs = ',5i8)") myprow, nprow, mypcol, npcol, nprocs @@ -2910,22 +2878,19 @@ subroutine diag_dseupd_p(n,bterm,nroots,factor,maxitr_,tol,h,e) ! has been exceeded. ! - !dec$ if (blacs_ > 0.or.mpi_ > 0) -#ifdef blacs_ +#if (blacs_ > 0 || mpi_ > 0) ! call pdsaupd ( comm, ido, bmat, nloc, which, nev, tol, resid, & ncv, v, ldv, iparam, ipntr, workd, workl, & lworkl, info ) ! - !dec$ else -#else +#else ! call dsaupd ( ido, bmat, n, which, nev, tol, resid, & ncv, v, ldv, iparam, ipntr, workd, workl, & lworkl, info ) ! #endif - !dec$ end if ! !if (verbose>=4.and.iparam(5)>0) then ! ! @@ -2959,8 +2924,7 @@ subroutine diag_dseupd_p(n,bterm,nroots,factor,maxitr_,tol,h,e) ! rvec = .true. ! - !dec$ if (blacs_ > 0) -#ifdef blacs_ +#if (blacs_ > 0) ! if (verbose>=5) write(out,"(/'Arpack: dseupd')") ! @@ -2972,14 +2936,12 @@ subroutine diag_dseupd_p(n,bterm,nroots,factor,maxitr_,tol,h,e) if (verbose>=5) write(out,"(/'Arpack: done!')") ! -#else - !dec$ else +#else ! write(out,"(/'Arpack was not activated yet. Please uncomment dsaupd and dseupd bellow')") stop 'Arpack was not activated' ! #endif - !dec$ end if ! if ( ierr < 0 ) then write(out,"(/'Error with_seupd, info = ',i8)") ierr @@ -3027,17 +2989,13 @@ subroutine diag_dseupd_p(n,bterm,nroots,factor,maxitr_,tol,h,e) ! 9000 continue ! - !dec$ if (blacs_ > 0) -#ifdef blacs_ +#if (blacs_ > 0) call BLACS_GRIDEXIT ( comm ) call BLACS_EXIT(0) #endif - !dec$ end if - !dec$ if (mpi_ > 0) -#ifdef mpi_ +#if (mpi_ > 0) call MPI_FINALIZE(rc) #endif - !dec$ end if deallocate(v,workl,workd,d,resid,select,mv_buf) @@ -3184,20 +3142,17 @@ subroutine dseupd_omp_arpack(n,bterm,nroots,factor,maxitr_,tol,h,e) ! has been exceeded. ! - !dec$ if (omparpack_ > 0) -#ifdef omparpack_ +#if (omparpack_ > 0) ! call dsaupd_ ( ido, bmat, n, which, nev, tol, resid, & ncv, v, ldv, iparam, ipntr, workd, workl, & lworkl, info ) - !dec$ else -#else +#else ! write(out,"(/'Arpack was not activated yet. Please uncomment dsaupd and dseupd bellow')") stop 'Arpack was not activated' ! #endif - !dec$ end if ! !if (verbose>=4.and.iparam(5)>0) then ! ! @@ -3233,8 +3188,7 @@ subroutine dseupd_omp_arpack(n,bterm,nroots,factor,maxitr_,tol,h,e) ! rvec = .true. ! - !dec$ if (omparpack_ > 0) -#ifdef omparpack_ +#if (omparpack_ > 0) ! if (verbose>=5) write(out,"(/'Arpack: dseupd')") ! @@ -3248,14 +3202,12 @@ subroutine dseupd_omp_arpack(n,bterm,nroots,factor,maxitr_,tol,h,e) ! if (verbose>=5) write(out,"(/'Arpack: done!')") ! - !dec$ else -#else +#else ! write(out,"(/'Arpack was not activated yet. Please uncomment dsaupd and dseupd bellow')") stop 'Arpack was not activated' ! #endif - !dec$ end if ! if ( ierr < 0 ) then write(out,"(/'Error with_seupd, info = ',i8)") ierr @@ -3348,8 +3300,7 @@ subroutine diag_propack(n,bterm,nroots,factor,maxiter,iverbose,tol,h,e) if (iverbose>=2) call TimerStart('diag_propack: diagonalization') ! ! - !dec$ if (propack_ < 1) -#ifndef propack_ +#if (propack_ < 1) ! write(out,'("PROPACK is not activated!")') stop 'PROPACK is not activated!' @@ -3358,7 +3309,6 @@ subroutine diag_propack(n,bterm,nroots,factor,maxiter,iverbose,tol,h,e) return ! #endif - !dec$ endif ! nev = nroots ! diff --git a/dipole.f90 b/dipole.f90 index 5b60570..bb942f2 100644 --- a/dipole.f90 +++ b/dipole.f90 @@ -1,6 +1,6 @@ module dipole - -!dec$ define dipole_debug = 0 ! set dipole_debug > 2 with small expansions only +! set dipole_debug > 2 with small expansions only +#define dipole_debug 0 ! 0 - none ! 1 - some checkings only (no printing) ! 2 - minimal printing @@ -876,10 +876,10 @@ subroutine dm_intensity_symmvec(Jval) ! enddo ! - !dec$ if (dipole_debug >= 0) +#if (dipole_debug >= 0) write(out,"(' Total number of lower states = ',i8)") nlevelI write(out,"(' Total number of transitions = ',i10)") Ntransit - !dec$ end if +#endif ! ! ! In order to speed up the line strength evaluation, @@ -3571,10 +3571,10 @@ subroutine dm_intensity(Jval) ! write(out,my_fmt) 'Number of states for each symm = ',nlevelsG(:) ! - !dec$ if (dipole_debug >= 0) +#if (dipole_debug >= 0) write(out,"(' Total number of lower states = ',i8)") nlevelI write(out,"(' Total number of transitions = ',i8)") Ntransit - !dec$ end if +#endif ! ! ! In order to speed up the line strength evaluation, @@ -3766,11 +3766,11 @@ subroutine dm_intensity(Jval) ! !if (ndegI>1) ideg_range = mod(idegI,2)+1 ! - !dec$ if (dipole_debug >=3) + !#if (dipole_debug >=3) ! ! ! ideg_range(1) = 1; ideg_range(2) = ndegF ! ! - !dec$ end if + !#endif ! if (job%rotsym_do) then ! @@ -3962,11 +3962,11 @@ subroutine dm_intensity(Jval) !! !if (ndegI>1) ideg_range = mod(idegI,2)+1 !! - !!dec$ if (dipole_debug >=3) + !#if (dipole_debug >=3) ! ! ! ideg_range(1) = 1; ideg_range(2) = ndegF ! ! - !!dec$ end if + !#endif ! ! swtich to a reduced C3v/D3h case by commenting the next loop ! @@ -5173,7 +5173,8 @@ subroutine do_1st_half_linestrength_rotsym(jI,jF,indI,indF,cdimenI,dimenF,icoeff ! !loop over final state basis components ! - !$omp parallel do private(irootF,icontrF,irlevelF,irdegF,cirootI,irootI,icontrI,irlevelI,irdegI,f_w,ls) shared(half_ls) schedule(guided) + !$omp parallel do private(irootF,icontrF,irlevelF,irdegF,cirootI,irootI,icontrI,irlevelI,irdegI,f_w,ls) shared(half_ls)& + !$omp& schedule(guided) loop_F : do irootF = 1, dimenF ! icontrF = bset_contr(indF)%iroot_correlat_j0(irootF) @@ -5938,4 +5939,4 @@ subroutine convert_symvector_to_contrvector(indI,dimenI,igammaI,idegI,ijterm,sym end subroutine convert_symvector_to_contrvector -end module dipole \ No newline at end of file +end module dipole diff --git a/fields.f90 b/fields.f90 index 2e94967..d16912f 100644 --- a/fields.f90 +++ b/fields.f90 @@ -1781,7 +1781,7 @@ subroutine FLReadInput(NPTorder,Npolyads,Natoms,Nmodes,Jrot) select case (trim(job%bset(imode)%type)) ! case ('NUMEROV','BOX','LAGUERRE','FOURIER','LEGENDRE') - case default + case default ! job%bset(imode)%coord_kinet = job%bset(imode)%type job%bset(imode)%coord_poten = job%bset(imode)%type @@ -1935,13 +1935,13 @@ subroutine FLReadInput(NPTorder,Npolyads,Natoms,Nmodes,Jrot) ! do i=1,Nmodes ! - if ((job%bset(i)%type ==job%bset(i-1)%type ).and.(job%bset(i)%dim ==job%bset(i-1)%dim ).and.& - (job%bset(i)%coord_kinet==job%bset(i-1)%coord_kinet).and.(job%bset(i)%coord_poten==job%bset(i-1)%coord_poten).and.& - (job%bset(i)%class ==job%bset(i-1)%class ).and.(job%bset(i)%dvrpoints ==job%bset(i-1)%dvrpoints ).and.& - (job%bset(i)%range(1) ==job%bset(i-1)%range(1) ).and.(job%bset(i)%range(2) ==job%bset(i-1)%range(2) ).and.& - (job%bset(i)%borders(1) ==job%bset(i-1)%borders(1) ).and.(job%bset(i)%borders(2) ==job%bset(i-1)%borders(2) ).and.& - (job%bset(i)%res_coeffs ==job%bset(i-1)%res_coeffs ).and.(job%bset(i)%npoints ==job%bset(i-1)%npoints ).and.& - (job%bset(i)%periodic.eqv.job%bset(i-1)%periodic ).and.(job%bset(i)%iperiod ==job%bset(i-1)%iperiod )) then + if (job%bset(i)%type ==job%bset(i-1)%type .and.job%bset(i)%dim ==job%bset(i-1)%dim.and.& + job%bset(i)%coord_kinet==job%bset(i-1)%coord_kinet.and.job%bset(i)%coord_poten==job%bset(i-1)%coord_poten.and.& + job%bset(i)%class ==job%bset(i-1)%class .and.job%bset(i)%dvrpoints ==job%bset(i-1)%dvrpoints.and.& + job%bset(i)%range(1) ==job%bset(i-1)%range(1) .and.job%bset(i)%range(2) ==job%bset(i-1)%range(2).and.& + job%bset(i)%borders(1) ==job%bset(i-1)%borders(1) .and.job%bset(i)%borders(2) ==job%bset(i-1)%borders(2).and.& + job%bset(i)%res_coeffs ==job%bset(i-1)%res_coeffs .and.job%bset(i)%npoints ==job%bset(i-1)%npoints .and.& + (job%bset(i)%periodic.eqv.job%bset(i-1)%periodic) .and.job%bset(i)%iperiod ==job%bset(i-1)%iperiod ) then ! job%bset(i)%species = ispecies else @@ -4359,7 +4359,7 @@ subroutine FLsetMolecule ! trove%bonds(1:Nbonds,:) = bonds(1:Nbonds,:) trove%angles(1:Nangles,:) = angles(1:Nangles,:) - trove%dihedrals(:,:) = dihedrals(:,:) + trove%dihedrals(0:Ndihedrals,:) = dihedrals(0:Ndihedrals,:) trove%dihedtype(:) = dihedtype(:) ! ! We define the coordinates @@ -5081,7 +5081,8 @@ subroutine zmat_to_bonds(bonds,angles,dihedrals,dihedtype,Nbonds,Nangles,Ndihedr ! if (all(zeta/=(/1,2,3/))) then ! - write (out,"('zmat_to_bonds: illegal zeta = ',i4,' of 3d dihedral for the linear angle of the atom ',i4,' ')") kappa,iatom + write (out,"('zmat_to_bonds: illegal zeta = ',i4,' of 3d dihedral for the linear angle of the atom ',i4,' ')") & + kappa,iatom stop 'zmat_to_bonds - illegal zeta' ! endif @@ -7026,7 +7027,8 @@ subroutine FLinitilize_Potential_II if (trim(trove%IO_hamiltonian)=='READ'.or.& trim(trove%IO_potential)=='READ') then ! - if (trim(trove%IO_kinetic)/='READ'.and.trim(trove%IO_hamiltonian)/='READ'.and..not.trove%separate_store) call FLcheck_point_Hamiltonian('KINETIC_SKIP') + if (trim(trove%IO_kinetic)/='READ'.and.trim(trove%IO_hamiltonian)/='READ'.and..not.trove%separate_store) & + call FLcheck_point_Hamiltonian('KINETIC_SKIP') ! call FLcheck_point_Hamiltonian('POTENTIAL_READ') ! @@ -7334,7 +7336,8 @@ subroutine FLinitilize_Potential if (trim(trove%IO_hamiltonian)=='READ'.or.& trim(trove%IO_potential)=='READ') then ! - if (trim(trove%IO_kinetic)/='READ'.and.trim(trove%IO_hamiltonian)/='READ'.and..not.trove%separate_store) call FLcheck_point_Hamiltonian('KINETIC_SKIP') + if (trim(trove%IO_kinetic)/='READ'.and.trim(trove%IO_hamiltonian)/='READ'.and..not.trove%separate_store) & + call FLcheck_point_Hamiltonian('KINETIC_SKIP') ! call FLcheck_point_Hamiltonian('POTENTIAL_READ') ! @@ -13275,7 +13278,8 @@ subroutine basisRestore read(chkptIO) Tcoeff ! if (fl%Ncoeff/= Tcoeff) then - write (out,"(' Checkpoint file ',a,': Ncoeff (basis) in g_vib disagree with ncoeff of field',2i4,1x,2I8)") k1,k2,fl%Ncoeff,Tcoeff + write (out,"(' Checkpoint file ',a,': Ncoeff (basis) in g_vib disagree with ncoeff of field',2i4,1x,2I8)") & + k1,k2,fl%Ncoeff,Tcoeff write (out,"('Consider switching BASIS_SET SAVE')") stop 'check_point_Hamiltonian - Ncoeff (basis) in g_vib disagree with ncoeff of field' end if @@ -15473,7 +15477,7 @@ subroutine fingerprintWrite write(chkptIO,"(i8,' <- Jrot, rotational angular momentum')") bset%dscr(0)%range(1) ! do imode = 0,trove%Nmodes - write(chkptIO,"(6x,i4,1x,3(a10,1x),i5,3x,a2,3x,i2,5x,i2,1x,2i4,2x,f6.1,2x,i9,1x,2f9.3,1x,i2,1x,i2,1x,a10,i9,i3,i3,i3)") & + write(chkptIO,"(6x,i4,1x,3(a10,1x),i5,3x,a2,3x,i2,5x,i2,1x,2i4,2x,f6.1,2x,i9,1x,2f9.3,1x,l,1x,i2,1x,a10,i9,l,l,l)") & imode, bset%dscr(imode) enddo ! diff --git a/lapack.f90 b/lapack.f90 index 46ba975..b52b0e0 100644 --- a/lapack.f90 +++ b/lapack.f90 @@ -1,9 +1,9 @@ module lapack -!!! dec $ define arpack_ = 0 -!!! dec $ define blacs_ = 0 -!!! dec $ define mpi_ = 0 -!!! dec $ define omparpack_ = 0 +#define arpack_ 0 +#define blacs_ 0 +#define mpi_ 0 +#define omparpack_ 0 ! ! Simplistic type-agnostic LAPACK interface @@ -1301,21 +1301,18 @@ subroutine dseupd_arpack(n,bterm,nroots,factor,maxitr_,tol,h,e,IO) nev2 = nev rnorm = 1e-5 ! - !!! dec $ if (arpack_ > 0) -#ifdef arpack_ +#if (arpack_ > 0) ! call dsaupd( ido, bmat, n, which, nev, tol, resid, & ncv, v, ldv, iparam, ipntr, workd, workl, & lworkl, info) !,np, rnorm, nconv, nev2 ) ! -#else - !!! dec $ else +#else ! write(out,"(/'Arpack was not activated yet. Please uncomment dsaupd and dseupd bellow')") stop 'Arpack was not activated' ! #endif - !!! dec $ end if ! ido = -1 ! @@ -1333,19 +1330,16 @@ subroutine dseupd_arpack(n,bterm,nroots,factor,maxitr_,tol,h,e,IO) ! has been exceeded. ! - !!! dec $ if (arpack_ > 0) -#ifdef arpack_ +#if (arpack_ > 0) ! call dsaupd ( ido, bmat, n, which, nev, tol, resid, & ncv, v, ldv, iparam, ipntr, workd, workl, & lworkl, info) !,np, rnorm, nconv, nev2) - !!! dec $ else -#else +#else ! write(out,"(/'Arpack was not activated yet. Please uncomment dsaupd and dseupd bellow')") stop 'Arpack was not activated' ! - !!! dec $ end if #endif ! !if (verbose>=4.and.iparam(5)>0) then @@ -1407,8 +1401,7 @@ subroutine dseupd_arpack(n,bterm,nroots,factor,maxitr_,tol,h,e,IO) ! rvec = .true. ! - !!! dec $ if (arpack_ > 0) -#ifdef arpack_ +#if (arpack_ > 0) ! if (verbose>=5) write(out,"(/'Arpack: dseupd')") ! @@ -1422,13 +1415,11 @@ subroutine dseupd_arpack(n,bterm,nroots,factor,maxitr_,tol,h,e,IO) ! if (verbose>=5) write(out,"(/'Arpack: done!')") ! - !!! dec $ else -#else +#else ! write(out,"(/'Arpack was not activated yet. Please uncomment dsaupd and dseupd bellow')") stop 'Arpack was not activated' ! - !!! dec $ end if #endif ! if ( ierr < 0 ) then @@ -1548,18 +1539,16 @@ subroutine matvec_p(comm,n,nloc,nprocs,kstart,dx,bterm,h,mv_buf,z,w) myid = 1 nprow = 1 ! - !!! dec $ if (blacs_ > 0) -#ifdef arpack_ +#if (blacs_ > 0) call BLACS_GRIDINFO( comm, nprow, npcol, myprow, mypcol ) myid= myprow - !!! dec $ elseif (mpi_ > 0) +#elif (mpi_ > 0) call MPI_COMM_RANK( comm, myid, ierr ) call MPI_COMM_SIZE( comm, nprocs_, ierr ) if (nprocs_/=nprocs) then write(out,"('matvec_p: inconsistent number of nprocs = ',2i8)") nprocs_,nprocs stop 'matvec_p: inconsistent number of nprocs s' endif - !!! dec $ end if #endif ! kend = kstart(myid) + nloc-1 @@ -1590,13 +1579,11 @@ subroutine matvec_p(comm,n,nloc,nprocs,kstart,dx,bterm,h,mv_buf,z,w) ! nx = iend-istart+1 ! - !!! dec $ if (blacs_ > 0) -#ifdef arpack_ +#if (blacs_ > 0) call dgerv2d( comm, nx, 1, mv_buf(istart:iend), nx, iprev, mypcol ) - !!! dec $ end if - !!! dec $ if (mpi_ > 0) +#endif +#if (mpi_ > 0) call mpi_recv(mv_buf(istart),nx,MPI_DOUBLE_PRECISION,iprev,myid,comm,ierr) - !!! dec $ end if #endif ! istart = max(istart,bterm(k,1)) @@ -1611,13 +1598,11 @@ subroutine matvec_p(comm,n,nloc,nprocs,kstart,dx,bterm,h,mv_buf,z,w) ! nx = iend-istart+1 ! - !!! dec $ if (blacs_ > 0) -#ifdef arpack_ +#if (blacs_ > 0) call dgesd2d( comm, nx, 1, z(istart:iend), nx, iprev, mypcol ) - !!! dec $ end if - !!! dec $ if (mpi_ > 0) +#endif +#if (mpi_ > 0) call mpi_send(mv_buf(istart),nx,MPI_DOUBLE_PRECISION,iprev,iprev,comm,ierr) - !!! dec $ end if #endif ! enddo @@ -1635,13 +1620,11 @@ subroutine matvec_p(comm,n,nloc,nprocs,kstart,dx,bterm,h,mv_buf,z,w) ! nx = iend-istart+1 ! - !!! dec $ if (blacs_ > 0) -#ifdef arpack_ +#if (blacs_ > 0) call dgerv2d( comm, nx, 1, mv_buf(istart:iend), nx, inext, mypcol ) - !!! dec $ end if - !!! dec $ if (mpi_ > 0) +#endif +#if (mpi_ > 0) call mpi_recv(mv_buf(istart),nx,MPI_DOUBLE_PRECISION,inext,myid,comm,ierr) - !!! dec $ end if #endif ! iend = min(bterm(k,2),iend) @@ -1656,13 +1639,11 @@ subroutine matvec_p(comm,n,nloc,nprocs,kstart,dx,bterm,h,mv_buf,z,w) ! nx = iend-istart+1 ! - !!! dec $ if (blacs_ > 0) -#ifdef arpack_ +#if (blacs_ > 0) call dgesd2d( comm, nx, 1, z(istart:iend), nx, inext, mypcol ) - !!! dec $ end if - !!! dec $ if (mpi_ > 0) +#endif +#if (mpi_ > 0) call mpi_send(mv_buf(istart),nx,MPI_DOUBLE_PRECISION,inext,inext,comm,ierr) - !!! dec $ end if #endif ! enddo @@ -1789,13 +1770,12 @@ subroutine dseupd_p_arpack(n,bterm,nroots,factor,maxitr_,tol,h,e) ! %-----------------------% ! - !!! dec $ if (blacs_ > 0) -#ifdef arpack_ +#if (blacs_ > 0) call BLACS_PINFO( iam, nprocs ) blacs_or_mpi = 'BLACS' - !!! dec $ end if +#endif - !!! dec $ if (mpi_ > 0) +#if (mpi_ > 0) call MPI_INIT( ierr ) comm = MPI_COMM_WORLD call MPI_COMM_RANK( comm, myid, ierr ) @@ -1810,9 +1790,8 @@ subroutine dseupd_p_arpack(n,bterm,nroots,factor,maxitr_,tol,h,e) ! blacs_or_mpi = 'MPI' ! - !!! dec $ end if -#endif ! +#endif ! @@ -1820,10 +1799,8 @@ subroutine dseupd_p_arpack(n,bterm,nroots,factor,maxitr_,tol,h,e) ! if (nprocs .lt. 1) then nprocs = 1 - !!! dec $ if (blacs_ > 0) -#ifdef arpack_ +#if (blacs_ > 0) call BLACS_SETUP( iam, nprocs ) - !!! dec $ end if #endif endif if (nprocs >maxnprocs) stop 'nprocs > maxnprocs' @@ -1909,12 +1886,10 @@ subroutine dseupd_p_arpack(n,bterm,nroots,factor,maxitr_,tol,h,e) ! ! myprow = 1 ; mypcol = 1 ; myid = 1 - !!! dec $ if (blacs_ > 0) -#ifdef arpack_ +#if (blacs_ > 0) call BLACS_GET( 0, 0, comm ) call BLACS_GRIDINIT( comm, 'Row', nprow, npcol ) call BLACS_GRIDINFO( comm, nprow, npcol, myprow, mypcol ) - !!! dec $ end if #endif ! if (verbose>=2.and.trim(blacs_or_mpi)=='BLACS') write(out,"('myprow, nprow, mypcol, npcol, nprocs = ',5i8)") & @@ -1982,21 +1957,18 @@ subroutine dseupd_p_arpack(n,bterm,nroots,factor,maxitr_,tol,h,e) ! has been exceeded. ! - !!! dec $ if (blacs_ > 0.or.mpi_ > 0) -#ifdef arpack_ +#if (blacs_ > 0 || mpi_ > 0) ! call pdsaupd ( comm, ido, bmat, nloc, which, nev, tol, resid, & ncv, v, ldv, iparam, ipntr, workd, workl, & lworkl, info ) ! - !!! dec $ else -#else +#else ! call dsaupd ( ido, bmat, n, which, nev, tol, resid, & ncv, v, ldv, iparam, ipntr, workd, workl, & lworkl, info ) ! - !!! dec $ end if #endif ! !if (verbose>=4.and.iparam(5)>0) then @@ -2031,8 +2003,7 @@ subroutine dseupd_p_arpack(n,bterm,nroots,factor,maxitr_,tol,h,e) ! rvec = .true. ! - !!! dec $ if (blacs_ > 0) -#ifdef arpack_ +#if (blacs_ > 0) ! if (verbose>=5) write(out,"(/'Arpack: dseupd')") ! @@ -2044,13 +2015,11 @@ subroutine dseupd_p_arpack(n,bterm,nroots,factor,maxitr_,tol,h,e) if (verbose>=5) write(out,"(/'Arpack: done!')") ! - !!! dec $ else -#else +#else ! write(out,"(/'Arpack was not activated yet. Please uncomment dsaupd and dseupd bellow')") stop 'Arpack was not activated' ! - !!! dec $ end if #endif ! if ( ierr < 0 ) then @@ -2099,14 +2068,12 @@ subroutine dseupd_p_arpack(n,bterm,nroots,factor,maxitr_,tol,h,e) ! 9000 continue ! - !!! dec $ if (blacs_ > 0) -#ifdef arpack_ +#if (blacs_ > 0) call BLACS_GRIDEXIT ( comm ) call BLACS_EXIT(0) - !!! dec $ end if - !!! dec $ if (mpi_ > 0) +#endif +#if (mpi_ > 0) call MPI_FINALIZE(rc) - !!! dec $ end if #endif diff --git a/makefile b/makefile index 31fda26..05dd386 100644 --- a/makefile +++ b/makefile @@ -13,7 +13,7 @@ pot_user = pot_ch4 PLAT = _2205_i17 ###FOR = ifort FOR = mpif90 -FFLAGS = -fopenmp -ffree-line-length-none -march=native -O3 -fcray-pointer +FFLAGS = -fopenmp -ffree-line-length-none -march=native -O3 -fcray-pointer -g3 #ARPACK = ~/libraries/ARPACK/libarpack_omp_64.a diff --git a/perturbation.f90 b/perturbation.f90 index f211e54..9e5c415 100644 --- a/perturbation.f90 +++ b/perturbation.f90 @@ -752,8 +752,7 @@ subroutine PTactive_space_init(bs_t) endif ! if (job%verbose>=2) then - write(my_fmt,'(a,i0,a)') "(a,",Nmodes,"i5)" - !write(out) + write(my_fmt,'(a,i0,a)') "(/a,",Nmodes,"i5)" write(out,my_fmt) 'Polyads estimated from range: ',(imode,imode=1,Nmodes) write(out,my_fmt) ' -> ',(pol_t(imode),imode=1,Nmodes) write(out,"(/'Adjusted (input) maximal polyad is: ',i5,'(',i5,')')") PT%Polyad_max,PT%Npolyads @@ -1079,8 +1078,7 @@ subroutine PTactive_space_init(bs_t) endif ! if (job%verbose>=4) then - write(my_fmt,'(a,i0,a)') "(a,",NPTorder1,"i8)" - !write(out) + write(my_fmt,'(a,i0,a)') "(/a,",NPTorder1,"i8)" write(out,my_fmt) 'RangeOrder vs order ',(iorder ,iorder=0,NPTorder) write(out,my_fmt) 'RangeOrder -> ',(PT%RangeOrder(iorder),iorder=0,NPTorder) endif @@ -1094,8 +1092,7 @@ subroutine PTactive_space_init(bs_t) ! Be verbose ! ! if (job%verbose>=4) then - write(my_fmt,'(a,i0,a)') "(a,",Npolyads1,"i8)" - !write(out) + write(my_fmt,'(a,i0,a)') "(/a,",Npolyads1,"i8)" write(out,my_fmt) 'MaxIndex_nu vs polyad ',(ipol,ipol=0,Npolyads) write(out,my_fmt) ' -> ',(PT%MaxIndex_nu(ipol),ipol=0,Npolyads) endif @@ -3106,7 +3103,8 @@ subroutine PTcontracted_prediagonalization(j) ! fv = 0 ! - !$omp parallel do private(k,nu,f_prim,i,imode,ispecies,xval,ipoint_t,v,r_t,func_t,fval,df_t,kroot) shared(fv) schedule(dynamic) reduction(max:info) + !$omp parallel do private(k,nu,f_prim,i,imode,ispecies,xval,ipoint_t,v,r_t,func_t,fval,df_t,kroot) & + !$omp& shared(fv) schedule(dynamic) reduction(max:info) do k = 1,dimen ! nu(:) = PT%active_space%icoeffs(:,k) @@ -8597,7 +8595,8 @@ recursive subroutine calc_symm_mat_element_vector_contr(jrot,irow,ijterm,func,hs ! isize = PT%Index_deg(irow)%size1 ! - !$omp parallel do private(jrow,cnu_j,jsize,ideg,deg_i,jdeg,deg_j,icontr,jcontr,hcontr) shared(hsym) schedule(dynamic) + !$omp parallel do private(jrow,cnu_j,jsize,ideg,deg_i,jdeg,deg_j,icontr,jcontr,hcontr) shared(hsym) & + !$omp& schedule(dynamic) do jrow = 1,irow ! cnu_j(:) = PT%contractive_space(:,jrow) @@ -8660,7 +8659,8 @@ recursive subroutine calc_symm_mat_element_vector_contr_k(jrot,irow,ijterm,func, ! isize = PT%Index_deg(irow)%size1 ! - !$omp parallel do private(jrow,cnu_j,jsize,ideg,deg_i,jdeg,deg_j,icontr,jcontr,k_i,k_j,tau_i,tau_j,hcontr) shared(hsym) schedule(dynamic) + !$omp parallel do private(jrow,cnu_j,jsize,ideg,deg_i,jdeg,deg_j,icontr,jcontr,k_i,k_j,tau_i,tau_j,hcontr) & + !$omp& shared(hsym) schedule(dynamic) do jrow = 1,irow ! cnu_j(:) = PT%contractive_space(:,jrow) @@ -8849,7 +8849,8 @@ subroutine diagonalization_contract(jrot,gamma,dimen_s,mat,zpe,rlevel,total_roo integer(hik) :: ndvr_ktau,matsize integer(ik) :: nelem,chkptIO integer(ik) :: dimen_maxrow,m_,i_,j_,kmax,jb - character(len=cl) :: my_fmt !format for I/O specification + character(len=cl) :: my_fmt !format for I/O specification + character(len=wl) :: my_fmt_l !format for long I/O specification ! ! Check for the trivial solution if (dimen_s<=0) return @@ -10023,6 +10024,14 @@ subroutine diagonalization_contract(jrot,gamma,dimen_s,mat,zpe,rlevel,total_roo nroots_ = 0 cdimenmax = 1 ! + write(my_fmt_l,'(a,i0,a,i0,a,i0,a,i0,a)') "(2x,a,i7,f14.6,3x,a1,a4,a1,3i3,a2,1x,a1,",Nclasses,"(1x,a3),a1,",& + Nmodes,"i4,a2,1x,f9.2,1x,a1,",Nmodes+1,"i4,a2,1x,a1,",Nclasses,"i4,a2)" + + !write(out,'(2x,a,i7,f14.6,3x,a1,a4,a1,3i3,a2,1x,a1'//fmt%Aclasses//',a1,'//fmt%Nmodes0//',a2,1x,f9.2,1x,a1,'//fmt%Nmodes//'," )",1x,"(",'//fmt%Nclasses0//',a2)') & + ! sym%label(gamma),iroot,termvalue,& + ! "(",cgamma(0),";",jrot,k,tau," )", & + ! "(",cgamma(1:PT%Nclasses),";",nu(1:PT%Nmodes)," )",maxcontrib(iroot)**2,"(",normal(1:PT%Nmodes),normal(0),cnu(1:PT%Nclasses)," )" + ! do iroot=1,nroots ! termvalue = energy(iroot)-ZPE @@ -10080,14 +10089,20 @@ subroutine diagonalization_contract(jrot,gamma,dimen_s,mat,zpe,rlevel,total_roo nu(1:PT%Nmodes)," )",maxcontrib(iroot)**2 ! else - ! - write(out,'(2x,a,i7,f14.6,3x,a1,a4,a1,3i3,a2,1x,a1,'//fmt%Aclasses//',a1,'//fmt%Nmodes0//',& - &a2,1x,f9.2,1x,a1,'//fmt%Nmodes//',a2,1x,a1,'//fmt%Nclasses0//',a2)') & - sym%label(gamma),iroot,termvalue,"(",& - cgamma(0),";",jrot,k,tau," )", & - "(",cgamma(1:PT%Nclasses),";", & - nu(1:PT%Nmodes)," )",maxcontrib(iroot)**2,"(",normal(1:PT%Nmodes)," )",& - "(",normal(0),cnu(1:PT%Nclasses)," )" + !write(out,'(2x,a,i7,f14.6,3x,"(",a4,";",3i3," )",1x,"("'//fmt%Aclasses//',";",'//fmt%Nmodes0//'," )",1x,f9.2,1x,"(",'//fmt%Nmodes//'," )",1x,"(",'//fmt%Nclasses0//'," )")') & + ! sym%label(gamma),iroot,termvalue,& + ! cgamma(0),jrot,k,tau, & + ! cgamma(1:PT%Nclasses), & + ! nu(1:PT%Nmodes),maxcontrib(iroot)**2,normal(1:PT%Nmodes),normal(0),cnu(1:PT%Nclasses) + + write(out,my_fmt_l)& + !write(out,'(2x,a,i7,f14.6,3x,a1,a4,a1,3i3,a2,1x,a1'//fmt%Aclasses//',a1,'//fmt%Nmodes0//',a2,1x,f9.2,1x,a1,'//fmt%Nmodes//',a2,1x,a1,'//fmt%Nclasses0//',a2)') & + sym%label(gamma),iroot,termvalue,& + "(",cgamma(0),";",jrot,k,tau," )", & + "(",cgamma(1:PT%Nclasses),";",nu(1:PT%Nmodes)," )",maxcontrib(iroot)**2,& + "(",normal(1:PT%Nmodes),normal(0)," )","(",cnu(1:PT%Nclasses)," )" + + ! endif ! @@ -10228,7 +10243,8 @@ subroutine diagonalization_contract(jrot,gamma,dimen_s,mat,zpe,rlevel,total_roo ! if (job%IOvector_symm) ilevel = iroot ! - write(my_fmt,'(a,i0,a,i0,a,i0,a)') "(i8,3i8,f20.12,i8,",nmodes,"i4,i8,",Nclasses1,"i4,i8,",Nmodes,"i4,2x,f17.8,",Nclasses,"i7)" + write(my_fmt,'(a,i0,a,i0,a,i0,a,i0,a)') "(i8,3i8,f20.12,i8,",nmodes,"i4,i8,",& + Nclasses1,"i4,i8,",Nmodes,"i4,2x,f17.8,",Nclasses,"i7)" ! !write(IOunit_quanta,'(i8,3i8,f20.12,i8,i4,i8,i4,i8,i4,2x,f17.8,i7)') irecord,& write(IOunit_quanta,my_fmt) irecord,gamma,ilevel,kdeg,energy(iroot),eignu(iroot,0:PT%Nmodes),ilarge_coef_t,& @@ -14671,7 +14687,8 @@ subroutine PTcontracted_matelem_class(jrot) call ArrayStart('PTcontracted_matelem_cl: icoeff2iroot',alloc,1,ik,matsize) call ArrayStart('PTcontracted_matelem_cl: icoeff2iroot',alloc,size(icoefficoeff1),ik) ! - !$omp parallel do private(icoeff,icase,ilambda,iclasses,ideg,ilevel,iroot) shared(icoeff2iroot,icoefficoeff1) schedule(dynamic) + !$omp parallel do private(icoeff,icase,ilambda,iclasses,ideg,ilevel,iroot) shared(icoeff2iroot,icoefficoeff1) & + !$omp& schedule(dynamic) do icoeff=1,PT%Maxcontracts ! icoefficoeff1(icoeff) = int(icoeff*(icoeff-1),hik)/2 @@ -17714,8 +17731,8 @@ subroutine calc_rot_contr_matrix_II(isymcoeff,fl,grot) ! icontr = PT%icase2icontr(isymcoeff,ideg) ! - !$omp parallel private(me_class0_vec,info_p,jcontr,energy_j,jsymcoeff,matelem,iclass,nu_i,nu_j,icoeff) shared(grot,uniqu_trans) - ! + !$omp parallel private(me_class0_vec,info_p,jcontr,energy_j,jsymcoeff,matelem,iclass,nu_i,nu_j,icoeff) & + !$omp& shared(grot,uniqu_trans) allocate(me_class0_vec(Ncoeff,Nclasses),stat=info_p) if (info_p/=0) then write (out,"(' Error ',i9,' trying to allocate array grot: me_class0_vec')") info_p @@ -17800,8 +17817,8 @@ subroutine calc_gcor_contr_matrix_II(k1,k2,isymcoeff,gcor) ! icontr = PT%icase2icontr(isymcoeff,ideg) ! - !$omp parallel private(me_class0_vec,info_p,jcontr,energy_j,jsymcoeff,iclass,nu_i,nu_j,matelem,icoeff) shared(gcor,uniqu_trans) - ! + !$omp parallel private(me_class0_vec,info_p,jcontr,energy_j,jsymcoeff,iclass,nu_i,nu_j,matelem,icoeff) & + !$omp& shared(gcor,uniqu_trans) allocate(me_class0_vec(Ncoeff,Nclasses),stat=info_p) if (info_p/=0) then write (out,"(' Error ',i9,' trying to allocate array gcor: me_class0_vec')") info_p @@ -18022,7 +18039,8 @@ subroutine calc_rot_contr_matrix_III(isymcoeff,fl,hvib) ! icontr = PT%icase2icontr(isymcoeff,ideg) ! - !$omp parallel private(nsize,mat1,mat2,info_p,jcontr,jsymcoeff,jsym,energy_j,nu_i,nu_j,iclass,iterm,icoeff,iuniq,matelem) shared(hvib) + !$omp parallel private(nsize,mat1,mat2,info_p,jcontr,jsymcoeff,jsym,energy_j,nu_i,nu_j,iclass,iterm,icoeff,iuniq,matelem) & + !$omp& shared(hvib) nsize = fl%icoeff(PT%Nclasses-1)%isize(1) ! allocate(mat1(nsize),mat2(nsize),stat=info_p) @@ -18165,7 +18183,8 @@ subroutine calc_vib_contr_matrix_III(isymcoeff,fl,hvib) ! icontr = PT%icase2icontr(isymcoeff,ideg) ! - !$omp parallel private(nsize,mat1,mat2,info_p,jcontr,jsymcoeff,jsym,energy_j,nu_i,nu_j,iclass,iterm,icoeff,iuniq,matelem) shared(hvib) + !$omp parallel private(nsize,mat1,mat2,info_p,jcontr,jsymcoeff,jsym,energy_j,nu_i,nu_j,iclass,iterm,icoeff,iuniq,matelem)& + !$omp& shared(hvib) nsize = fl%icoeff(PT%Nclasses-1)%isize(1) ! allocate(mat1(nsize),mat2(nsize),stat=info_p) @@ -18365,8 +18384,8 @@ subroutine calc_vib_contr_matrix_II(isymcoeff,fl,hvib) ! icontr = PT%icase2icontr(isymcoeff,ideg) ! - !$omp parallel private(me_class0_vec,info_p,jcontr,energy_j,jsymcoeff,jsym,iclass,nu_i,nu_j,matelem,icoeff) shared(hvib,uniqu_trans) - ! + !$omp parallel private(me_class0_vec,info_p,jcontr,energy_j,jsymcoeff,jsym,iclass,nu_i,nu_j,matelem,icoeff) & + !$omp& shared(hvib,uniqu_trans) allocate(me_class0_vec(Ncoeff,Nclasses),stat=info_p) if (info_p/=0) then write (out,"(' Error ',i9,' trying to allocate array gvib: me_class0_vec')") info_p @@ -18465,8 +18484,8 @@ subroutine calc_field_contr_matrix(isymcoeff,fl,hvib) ! icontr = PT%icase2icontr(isymcoeff,ideg) ! - !$omp parallel private(me_class0_vec,info_p,jcontr,energy_j,jsymcoeff,jsym,iclass,nu_i,nu_j,iterm,matelem,icoeff) shared(hvib,uniqu_trans) - ! + !$omp parallel private(me_class0_vec,info_p,jcontr,energy_j,jsymcoeff,jsym,iclass,nu_i,nu_j,iterm,matelem,icoeff) & + !$omp& shared(hvib,uniqu_trans) allocate(me_class0_vec(Ncoeff,Nclasses),stat=info_p) if (info_p/=0) then write (out,"(' Error ',i9,' trying to allocate array gvib: me_class0_vec')") info_p @@ -18571,7 +18590,8 @@ subroutine calc_vpot_contr_matrix_II(isymcoeff,hvib) ! icontr = PT%icase2icontr(isymcoeff,ideg) ! - !$omp parallel private(me_class0_vec,info_p,jcontr,energy_j,jsymcoeff,jsym,iclass,nu_i,nu_j,matelem,icoeff) shared(hvib,uniqu_trans) + !$omp parallel private(me_class0_vec,info_p,jcontr,energy_j,jsymcoeff,jsym,iclass,nu_i,nu_j,matelem,icoeff) & + !$omp& shared(hvib,uniqu_trans) ! allocate(me_class0_vec(Ncoeff,Nclasses),stat=info_p) if (info_p/=0) then @@ -18664,8 +18684,8 @@ subroutine calc_extF_contr_matrix_II(k1,isymcoeff,extF) ! icontr = PT%icase2icontr(isymcoeff,ideg) ! - !$omp parallel private(me_class0_vec,info_p,jcontr,energy_j,jsymcoeff,iclass,nu_i,nu_j,matelem,icoeff) shared(extF,uniqu_trans) - ! + !$omp parallel private(me_class0_vec,info_p,jcontr,energy_j,jsymcoeff,iclass,nu_i,nu_j,matelem,icoeff) & + !$omp& shared(extF,uniqu_trans) allocate(me_class0_vec(Ncoeff,Nclasses),stat=info_p) if (info_p/=0) then write (out,"(' Error ',i9,' trying to allocate array extF: me_class0_vec')") info_p @@ -18851,7 +18871,8 @@ subroutine calc_extF_contr_matrix(k1,isymcoeff,extF) ! icontr = PT%icase2icontr(isymcoeff,ideg) ! - !$omp parallel do private(jcontr,energy_j,jsymcoeff,matelem,icoeff,iclass,nu_i,nu_j,iterm,me_class0) shared(hvib) schedule(dynamic) + !$omp parallel do private(jcontr,energy_j,jsymcoeff,matelem,icoeff,iclass,nu_i,nu_j,iterm,me_class0) & + !$omp& shared(hvib) schedule(dynamic) do jcontr=1,icontr ! if (debug_cut_matelem_with_enermax) then @@ -25220,7 +25241,8 @@ subroutine PTDVR_initialize(j,bs,iclass_,reduced_model_,ifread) !$omp parallel private(extF_t,extF_) allocate(extF_t(max(extF_rank,1)),extF_(max(extF_rank,1),-2:2)) ! - !$omp do private(idvrpoint,imode,ispecies,k,dchi,irho,poten_t,gvib_t,grot_t,gcor_t,xval,dchi_,i,irho_,x,poten_,gvib_,grot_,gcor_,fval,df_t,k1,k2,imu) + !$omp do private(idvrpoint,imode,ispecies,k,dchi,irho,poten_t,gvib_t,grot_t,gcor_t,xval,dchi_,i,irho_,x,poten_,gvib_,grot_,& + !$omp& gcor_,fval,df_t,k1,k2,imu) do idvrpoint = 1,size_total ! do imode = 1,PT%Nmodes @@ -25798,7 +25820,8 @@ subroutine PTDVR_contracted_basis_generate(j) ! enddo ! - !$omp parallel do private(iclass,im1,im2,nlevels,ic,ip,ilevel,level_degen,ideg,sum_f,sum_d,iprim,nu,imode,ispecies,mat_f,mat_d,f_prim) schedule(dynamic) + !$omp parallel do private(iclass,im1,im2,nlevels,ic,ip,ilevel,level_degen,ideg,sum_f,sum_d,iprim,nu,imode,& + !$omp& ispecies,mat_f,mat_d,f_prim) schedule(dynamic) do iclass = 1,PT%Nclasses ! im1 = PT%mode_class(iclass,1) diff --git a/pot_xy2.f90 b/pot_xy2.f90 index 58b93e2..88e304e 100644 --- a/pot_xy2.f90 +++ b/pot_xy2.f90 @@ -2102,6 +2102,7 @@ function MLpoten_xy2_dmbe(ncoords,natoms,local,xyz,force) result(f) xcos = cos(alpha) ! !call potv(v,r12,r32,xcos) + v = 0 v = v*tocm ! !v = v + MLpoten_xy2_bubukina(ncoords,natoms,local,xyz,force) diff --git a/pot_zxy2.f90 b/pot_zxy2.f90 index 0a76188..b73d59a 100644 --- a/pot_zxy2.f90 +++ b/pot_zxy2.f90 @@ -53,8 +53,10 @@ function ML_MEP_zxy2_rho_coeff(x) result(dst) ! cs=1.0_ark+cos(x) ! - dst(1) = molec%mep_params(1)+molec%mep_params(2)*cs+molec%mep_params(3)*cs**2+molec%mep_params(4)*cs**3+molec%mep_params( 5)*cs**4 - dst(2) = molec%mep_params(6)+molec%mep_params(7)*cs+molec%mep_params(8)*cs**2+molec%mep_params(9)*cs**3+molec%mep_params(10)*cs**4 + dst(1) = molec%mep_params(1)+molec%mep_params(2)*cs+molec%mep_params(3)*cs**2+molec%mep_params(4)*cs**3 & + +molec%mep_params( 5)*cs**4 + dst(2) = molec%mep_params(6)+molec%mep_params(7)*cs+molec%mep_params(8)*cs**2+molec%mep_params(9)*cs**3 & + +molec%mep_params(10)*cs**4 dst(3) = dst(2) dst(4) = molec%mep_params(11)*rad+molec%mep_params(12)*cs+molec%mep_params(13)*cs**2+molec%mep_params(14)*cs**3+& molec%mep_params(15)*cs**4 diff --git a/pot_zxy3.f90 b/pot_zxy3.f90 index 2f844ae..8463bf8 100644 --- a/pot_zxy3.f90 +++ b/pot_zxy3.f90 @@ -446,7 +446,8 @@ recursive subroutine MLdms2xyz_zxy3_sym(rank,ncoords,natoms,local,xyz0,f) beta413 = aacos(cosbeta,txt) ! if (abs(beta312+beta412+beta413-2.0_rk*pi)>sqrt(small_)) then - write(out,"('MLpoten_zxy3_sym: beta312+beta412+beta413/=2.0_rk*pi for t1,t2,t3',4f16.8)") beta312,beta412,beta413,beta312+beta412+beta413 + write(out,"('MLpoten_zxy3_sym: beta312+beta412+beta413/=2.0_rk*pi for t1,t2,t3',4f16.8)") beta312,beta412,beta413 & + ,beta312+beta412+beta413 stop endif ! diff --git a/refinement.f90 b/refinement.f90 index 4d5eaaf..dad12ce 100644 --- a/refinement.f90 +++ b/refinement.f90 @@ -1,8 +1,5 @@ module refinement -!dec$ define fit_debug = 1 - - use accuracy, only : ik, hik, rk, ark, cl, out, small_ use fields, only : manifold,job,fitting,j0fit,FLNmodes,FLindexQ,FLQindex,FL_fdf,FLpoten4xi,& FLfinitediffs_2d,FLpoten_linearized,analysis,action @@ -32,6 +29,7 @@ module refinement integer(ik), allocatable :: Jindex(:) ! integer(ik), pointer :: Jeigenvec_unit(:,:) + integer(ik),parameter :: fit_debug = 1 type coeffT real(rk),pointer :: coeff(:) @@ -498,9 +496,9 @@ subroutine sf_fitting(Jval) ! do i=1,pot_npts ! - !dec$ if (fit_debug > 6) + if (fit_debug > 6) then write (out,"('i = ',i0)") i - !dec$ end if + endif ! read (potunit,*) ar_t(1:molec%ncoords),pot_values(i),wtall(en_npts+i) local(:,i) = ar_t(:) @@ -648,10 +646,10 @@ subroutine sf_fitting(Jval) ! Only fitted energies are printed. ! write(out,"(/1X,100('-'),/a,/1X,100('-'))") & - '| ## | N | J | sym| Obs. | Calc. | Obs.-Calc. | Weight | K vib. quanta' + '|## | N | J | sym| Obs. | Calc. | Obs.-Calc. | Weight | K vib. quanta' ! write(enunit,"(/1X,100('-'),/a,/1X,100('-'))") & - '| ## | N | J | Sym| Obs. | Calc. | Obs.-Calc. | Weight | K quanta (Calc./Obs.) ' + '|## | N | J | Sym| Obs. | Calc. | Obs.-Calc. | Weight | K quanta (Calc./Obs.) ' ! do j = 1,jlistmax ! @@ -751,9 +749,9 @@ subroutine sf_fitting(Jval) ! mat(jentry,ientry) = mat(ientry,jentry) ! - !dec$ if (fit_debug > 3) + if (fit_debug > 3) then write (out,"('mat (',i0,',',i0,')= ',es14.7)") ientry,jentry,mat(ientry,jentry) - !dec$ end if + endif ! enddo ! @@ -774,12 +772,12 @@ subroutine sf_fitting(Jval) endif enddo ! - !dec$ if (fit_debug > 2) + if (fit_debug > 2) then ! write (out,"(/'Smallest diag. value of mat = ',es14.7,'at k = ',i8,' upper_range = ',es14.7/)") & mat(k,k),k,job%upper_ener+mat(k,k) ! - !dec$ end if + endif ! if (allocated(energy_)) deallocate(energy_) ! @@ -1268,18 +1266,18 @@ subroutine sf_fitting(Jval) do icolumn=1,irow al(irow,icolumn)=sum(rjacob(1:npts,icolumn)*rjacob(1:npts,irow)*wtall(1:npts)) al(icolumn,irow)=al(irow,icolumn) - !dec$ if (fit_debug > 2) + if (fit_debug > 2) then write (out,"('al (',i0,',',i0,')= ',es14.7)") irow,icolumn,al(irow,icolumn) - !dec$ end if + endif enddo enddo ! ! form B matrix do irow=1,numpar bl(irow)=sum(eps(1:npts)*rjacob(1:npts,irow)*wtall(1:npts)) - !dec$ if (fit_debug > 2) + if (fit_debug > 2) then write (out,"('bl (',i0,')= ',es14.7)") irow,bl(irow) - !dec$ end if + endif enddo ! ! Two types of the linear solver are availible: @@ -1497,8 +1495,6 @@ subroutine sf_fitting(Jval) ! ! Print the potential energy points into a separate unit. ! - !dec$ if (fit_debug > 1) - !dec$ end if ! if (job%verbose>=6) call TimerReport ! @@ -2029,10 +2025,10 @@ subroutine bandcentres_fitting(Jval) ! Only fitted energies are printed. ! write(out,"(/1X,100('-'),/a,/1X,100('-'))") & - '| ## | N | J | sym| Obs. | Calc. | Obs.-Calc. | Weight | K vib. quanta' + '|## | N | J | sym| Obs. | Calc. | Obs.-Calc. | Weight | K vib. quanta' ! write(enunit,"(/1X,100('-'),/a,/1X,100('-'))") & - '| ## | N | J | Sym| Obs. | Calc. | Obs.-Calc. | Weight | K quanta (Calc./Obs.) ' + '|## | N | J | Sym| Obs. | Calc. | Obs.-Calc. | Weight | K quanta (Calc./Obs.) ' ! do j = 1,jlistmax ! @@ -2115,9 +2111,9 @@ subroutine bandcentres_fitting(Jval) ! mat(jentry,ientry) = mat(ientry,jentry) ! - !dec$ if (fit_debug > 3) - write (out,"('mat (',i0,',',i0,')= ',es14.7)") ientry,jentry,mat(ientry,jentry) - !dec$ end if + if (fit_debug > 3) then + write (out,"('mat (',i0,',',i0,')= ',es14.7)") ientry,jentry,mat(ientry,jentry) + endif ! enddo ! @@ -2139,12 +2135,12 @@ subroutine bandcentres_fitting(Jval) endif enddo ! - !dec$ if (fit_debug > 2) + if (fit_debug > 2) then ! write (out,"(/'Smallest diag. value of mat = ',es14.7,'at k = ',i8,' upper_range = ',es14.7/)") & mat(k,k),k,job%upper_ener+mat(k,k) ! - !dec$ end if + endif ! if (allocated(energy_)) deallocate(energy_) ! @@ -2493,18 +2489,18 @@ subroutine bandcentres_fitting(Jval) do icolumn=1,irow al(irow,icolumn)=sum(rjacob(1:npts,icolumn)*rjacob(1:npts,irow)*wtall(1:npts)) al(icolumn,irow)=al(irow,icolumn) - !dec$ if (fit_debug > 2) + if (fit_debug > 2) then write (out,"('al (',i0,',',i0,')= ',es14.7)") irow,icolumn,al(irow,icolumn) - !dec$ end if + endif enddo enddo ! ! form B matrix do irow=1,numpar bl(irow)=sum(eps(1:npts)*rjacob(1:npts,irow)*wtall(1:npts)) - !dec$ if (fit_debug > 2) + if (fit_debug > 2) then write (out,"('bl (',i0,')= ',es14.7)") irow,bl(irow) - !dec$ end if + endif enddo ! ! Two types of the linear solver are availible: @@ -2754,9 +2750,6 @@ subroutine bandcentres_fitting(Jval) ! ! Print the potential energy points into a separate unit. ! - !dec$ if (fit_debug > 1) - !dec$ end if - ! if (job%verbose>=6) call TimerReport ! enddo ! --- fititer @@ -2879,9 +2872,9 @@ subroutine restore_vib_matrix_elements integer(hik) :: rootsize,rootsize2 - !dec$ if (fit_debug > 1) + if (fit_debug > 1) then write(out, '(/a, 1x, a)') 'read vibrational contracted matrix elements from file', trim(job%extFmat_file) - !dec$ end if + endif ! job_is ='external field contracted matrix elements for J=0' call IOStart(trim(job_is),chkptIO) @@ -2906,9 +2899,9 @@ subroutine restore_vib_matrix_elements rootsize = int(ncontr_t*(ncontr_t+1)/2,hik) rootsize2= int(ncontr_t*ncontr_t,hik) ! - !dec$ if (fit_debug > 2) + if (fit_debug > 2) then write(out,"(/'restore_vib_matrix_elements...: Number of elements: ',i8)") ncontr_t - !dec$ end if + endif ! allocate(poten_me(ncontr_t,ncontr_t,extF%rank),stat=alloc) call ArrayStart('poten_me',alloc,1,kind(poten_me),rootsize2) @@ -2934,9 +2927,9 @@ subroutine restore_vib_matrix_elements ! close(chkptIO,status='keep') - !dec$ if (fit_debug > 1) + if (fit_debug > 2) then write(out, '(/a)') 'done' - !dec$ end if + endif ! end subroutine restore_vib_matrix_elements @@ -2997,10 +2990,10 @@ subroutine Hamiltonian_vector(ientry,indJ,nentries,tmat,cdimen,poten,mat) ! !compute me ! - !dec if (fit_debug >= 3) + !#if (fit_debug >= 3) ! write (out,"('irootF,icontrF,cirootI,icontrI,irow,icol,cindtmat(ientry)%icoef = ',8i,' poten,tmat = ',2(2x,es18.9))") irootF,icontrF,cirootI,icontrI,irow,icol,cind,tmat(ientry)%icoeff(cirootI),& ! poten(cind),tmat(ientry)%coeff(cirootI) - !dec end if + !#endif ! half_matelem(irootF) = half_matelem(irootF) + poten(icontrI,icontrF)*tmat(ientry)%coeff(cirootI) ! @@ -3009,9 +3002,9 @@ subroutine Hamiltonian_vector(ientry,indJ,nentries,tmat,cdimen,poten,mat) end do !$omp end parallel do ! - !dec$ if (fit_debug > 3) + if (fit_debug > 2) then write (out,"('ientry = ',i0,'; dimen = ',i0)") ientry,dimen - !dec$ end if + endif ! !loop over final states ! @@ -3032,18 +3025,18 @@ subroutine Hamiltonian_vector(ientry,indJ,nentries,tmat,cdimen,poten,mat) ! mat(jentry) = mat(jentry) + half_matelem(tmat(jentry)%icoeff(cirootI))*tmat(jentry)%coeff(cirootI) ! - !dec if (fit_debug >= 3) + !#if (fit_debug >= 3) ! write (out,"('jentry = ',2i,'; mat,half_matelem,tmat-coef,icoef = ',3es18.8,i0)") & ! jentry,cirootI,mat(jentry),& ! half_matelem(tmat(jentry)%icoeff(cirootI)),& ! tmat(jentry)%coeff(cirootI),tmat(jentry)%icoeff(cirootI) - !dec end if + !#endif ! enddo ! - !dec if (fit_debug >= 3) + !#if (fit_debug >= 3) ! write (out,"('jentry = ',i0,'; mat(jentry) = ',es18.8)") jentry,mat(jentry) - !dec end if + !#endif ! end do Flevels_loop !$omp end parallel do @@ -3219,7 +3212,7 @@ subroutine calc_exp_values(nJ,Jval) do isym=1,sym%Nrepresen ! if (job%verbose>=2) write (out,"('jrot = ',i0,'; sym = ',i0)") Jrot,isym - !write (out,"(/'iparam # ilist matrix ')") + !write (out,"(/'iparam# ilist matrix ')") ! Nentries = fit(isym,jind)%Nentries ! @@ -3754,7 +3747,8 @@ subroutine prepare_diff_evib(nJ,Jval) ! cdimen_ = max(kdimen(ientry,icontr,ktau_i),1) ! - if (job%verbose>=6) write (out,"(' ientry = ',i0,'ktau_i = ',i4,' icontr = ',i0,' cdimen_ = ',i0)") ientry,ktau_i,icontr,cdimen_ + if (job%verbose>=6) write (out,"(' ientry = ',i0,'ktau_i = ',i4,' icontr = ',i0,' cdimen_ = ',i0)") & + ientry,ktau_i,icontr,cdimen_ ! !allocate(kmat(ientry,icontr,ktau_i)%coeff(cdimen_),stat=alloc) !if (alloc /= 0) stop 'fitting-vec allocation error: kmat%coeff - out of memory' @@ -3833,7 +3827,8 @@ subroutine prepare_diff_evib(nJ,Jval) ! !ij = ientry*(ientry-1)/2+jentry ! - !$omp parallel do private(ciroot,iroot,icontr,ktau_i,cjroot,jroot,jcontr,ktau_j) shared(deriv_matrix) schedule(guided) + !$omp parallel do private(ciroot,iroot,icontr,ktau_i,cjroot,jroot,jcontr,ktau_j) & + !$omp& shared(deriv_matrix) schedule(guided) do ciroot = 1, cdimen(ientry) ! iroot = tmat(ientry)%icoeff(ciroot) @@ -3921,7 +3916,8 @@ subroutine prepare_diff_evib(nJ,Jval) ! !ij = ientry*(ientry-1)/2+jentry ! - !$omp parallel do private(ciroot,iroot,icontr,icase_j0,ktau_i,cjroot,jroot,jcontr,ktau_j) shared(deriv_matrix) schedule(guided) + !$omp parallel do private(ciroot,iroot,icontr,icase_j0,ktau_i,cjroot,jroot,jcontr,ktau_j) & + !$omp& shared(deriv_matrix) schedule(guided) l_iroot: do ciroot = 1, cdimen(ientry) ! iroot = tmat(ientry)%icoeff(ciroot) diff --git a/symmetry.f90 b/symmetry.f90 index 47c3271..743d3a2 100644 --- a/symmetry.f90 +++ b/symmetry.f90 @@ -9,7 +9,7 @@ module symmetry type ScIIT integer(ik) :: Noper ! Number of operations in the CII operator integer(ik) :: Nzeta ! Number of symmetric elements taking into account the degeneracies - real(ark),pointer :: ioper(:) ! the operation number in the MS group + integer(ik),pointer :: ioper(:) ! the operation number in the MS group integer(ik),pointer :: coeff(:) ! coefficients of the CII operator integer(ik),pointer :: izeta(:) ! symmetry indentification as a eigenvalues of the CII operator end type ScIIT diff --git a/tran.f90 b/tran.f90 index 421c519..27bcff8 100644 --- a/tran.f90 +++ b/tran.f90 @@ -3,8 +3,8 @@ ! in terms of the contracted basis state representaion. ! module tran - -!dec define tran_debug = 1 ! set tran_debug > 2 with small vibrational bases and small expansions only +! set tran_debug > 2 with small vibrational bases and small expansions only +!#define tran_debug 1 use accuracy, only : ik, rk, hik, ark, cl, out, small_ use timer, only : IOstart,IOstop,arraystart,arraystop,arrayminus,Timerstart,Timerstop,TimerReport,MemoryReport From 146a947d351a19cdd859ddb8cc00a5754511febe Mon Sep 17 00:00:00 2001 From: IBeArjen Date: Tue, 5 Feb 2019 17:18:32 +0000 Subject: [PATCH 03/79] gfortran-compatible MPI --- coarray_aux.f90 | 375 +++++++++++++++++ fields.f90 | 2 +- makefile | 6 +- perturbation.f90 | 1017 ++++++++++++++++++++++++++-------------------- trove.f90 | 9 +- 5 files changed, 953 insertions(+), 456 deletions(-) create mode 100644 coarray_aux.f90 diff --git a/coarray_aux.f90 b/coarray_aux.f90 new file mode 100644 index 0000000..1a95dba --- /dev/null +++ b/coarray_aux.f90 @@ -0,0 +1,375 @@ +module coarray_aux + use mpi_f08 + use timer + implicit none + + public co_init_comms, co_finalize_comms, co_init_distr, co_distr_data, co_write_matrix_distr + public co_create_type + + public send_or_recv, comm_size, proc_rank + public co_startdim, co_enddim + + interface co_sum + module procedure :: co_sum_double + end interface + + !interface co_max + ! module procedure :: co_max_double + !end interface + + interface co_gather + module procedure :: co_gather_double + module procedure :: co_gatherv_double + end interface + + integer,dimension(:),allocatable :: proc_sizes, proc_offsets, send_or_recv + integer :: comm_size, proc_rank + integer :: co_startdim, co_enddim + logical :: comms_inited = .false., distr_inited=.false. + type(MPI_Datatype) :: mpitype_column + +contains + + subroutine co_sum_double(x, result_image) + real*8, intent(inout), dimension(:,:) :: x + integer, optional :: result_image + integer :: i + !integer, save :: result_image_mpi[*] + + call TimerStart('co_sum_double') + + !if (present(result_image)) then + + !if (this_image() .eq. 1) then + ! call mpi_comm_rank(mpi_comm_world, result_image_mpi) + ! do i = 2, num_images() + ! result_image_mpi[i] = result_image_mpi + ! end do + !end if + !sync all + + if (proc_rank .eq. 0) then + call mpi_reduce(mpi_in_place, x, size(x), mpi_double_precision, mpi_sum, 0, mpi_comm_world) + else + call mpi_reduce(x, x, size(x), mpi_double_precision, mpi_sum, 0, mpi_comm_world) + endif + !else + ! call mpi_allreduce(mpi_in_place, x, size(x), mpi_double_precision, mpi_sum, mpi_comm_world) + !end if + call TimerStop('co_sum_double') + end subroutine + + !subroutine co_max_double(x, result_image) + ! real*8, intent(inout), dimension(:,:) :: x + ! integer, optional :: result_image + ! integer :: i + ! integer, save :: result_image_mpi[*] + + ! call TimerStart('co_max_double') + + ! if (present(result_image)) then + + ! if (this_image() .eq. 1) then + ! call mpi_comm_rank(mpi_comm_world, result_image_mpi) + ! do i = 2, num_images() + ! result_image_mpi[i] = result_image_mpi + ! end do + ! end if + ! sync all + + ! if (this_image() .eq. 1) then + ! call mpi_reduce(mpi_in_place, x, size(x), mpi_double_precision, mpi_max, result_image_mpi, mpi_comm_world) + ! else + ! call mpi_reduce(x, x, size(x), mpi_double_precision, mpi_max, result_image_mpi, mpi_comm_world) + ! endif + ! else + ! call mpi_allreduce(mpi_in_place, x, size(x), mpi_double_precision, mpi_max, mpi_comm_world) + ! end if + ! call TimerStop('co_max_double') + !end subroutine + + subroutine co_gather_double(x, static) + real*8, intent(inout), dimension(:,:) :: x + logical, intent(in) :: static + integer :: ierr + + if (.not. comms_inited .or. .not. distr_inited) stop "COMMS NOT INITIALISED" + if (comm_size .eq. 1) return + + call TimerStart('CO_GATHER_DOUBLE') + call mpi_gather(x, 0, mpi_double_precision, x, proc_sizes(2), mpi_double_precision, 0, mpi_comm_world) + if (ierr .gt. 0) stop "co_gather_double" + call TimerStop('CO_GATHER_DOUBLE') + + end subroutine co_gather_double + + subroutine co_gatherv_double(x) + real*8, intent(inout), dimension(:,:) :: x + integer :: ierr + + if (.not. comms_inited .or. .not. distr_inited) stop "COMMS NOT INITIALISED" + if (comm_size .eq. 1) return + + !write(*,*) proc_rank, proc_sizes + + call TimerStart('CO_GATHERV_DOUBLE') + if (proc_rank.eq.0) then + call mpi_gatherv(x, 0, mpi_double_precision, x, proc_sizes, proc_offsets, mpi_double_precision, 0, mpi_comm_world) + else + call mpi_gatherv(x, size(x), mpi_double_precision, x, proc_sizes, proc_offsets, mpi_double_precision, 0, mpi_comm_world) + endif + if (ierr .gt. 0) stop "co_gatherv_double" + call TimerStop('CO_GATHERV_DOUBLE') + + end subroutine co_gatherv_double + + + subroutine co_init_comms() + integer :: ierr + + call mpi_init(ierr) + if (ierr .gt. 0) stop "MPI_INIT" + call mpi_comm_size(mpi_comm_world, comm_size, ierr) + if (ierr .gt. 0) stop "MPI_COMM_SIZE" + call mpi_comm_rank(mpi_comm_world, proc_rank, ierr) + if (ierr .gt. 0) stop "MPI_COMM_RANK" + + comms_inited = .true. + + end subroutine co_init_comms + + subroutine co_finalize_comms() + integer :: ierr + + if (.not. comms_inited) stop "CO_FINALIZE_COMMS COMMS NOT INITED" + + call mpi_finalize(ierr) + + if (ierr .gt. 0) stop "MPI_FINALIZE" + end subroutine co_finalize_comms + + subroutine co_init_distr(dimen, startdim, enddim, blocksize) + integer,intent(in) :: dimen + integer,intent(out) :: startdim, enddim, blocksize + integer,dimension(:),allocatable :: starts, ends + integer :: localsize, proc_index, localsize_ + integer :: i, ierr, to_calc + + if (.not. comms_inited) stop "COMMS NOT INITIALISED" + if (distr_inited) stop "DISTRIBUTION ALREADY INITIALISED" + + proc_index = proc_rank+1 + + ! While co-arraying + !if (this_image().ne.proc_index) stop "coarray/mpi mixup" + + allocate(proc_sizes(comm_size),proc_offsets(comm_size),send_or_recv(comm_size),starts(comm_size),ends(comm_size),stat=ierr) + if (ierr .gt. 0) stop "CO_INIT_DISTR ALLOCATION FAILED" + + if (comm_size .eq. 1) then + startdim = 1 + enddim = dimen + blocksize = dimen*dimen + send_or_recv(1) = 0 + else + + if (proc_rank .eq. 0) then !root + + localsize = dimen/comm_size + localsize_ = int(1+real(dimen/comm_size)) + + starts(1) = 1 + ends(1) = localsize_ + proc_sizes(1) = localsize_*(comm_size*localsize_) + proc_offsets(1) = 0 + + do i=2,comm_size-1 + starts(i) = (i-1)*localsize_+1 + ends(i) = i*localsize_ + proc_sizes(i) = localsize_ * (comm_size*localsize_)!dimen + proc_offsets(i) = localsize_*(i-1)*(comm_size*localsize_)!dimen + end do + + starts(comm_size) = (i-1) * localsize_ + 1 + ends(comm_size) = dimen!comm_size*localsize_!dimen + !proc_sizes(comm_size) = (dimen-localsize*(comm_size-1)) * dimen + proc_sizes(comm_size) = localsize_*comm_size*localsize_!dimen + + proc_offsets(comm_size) = (comm_size-1)*localsize_*(comm_size*localsize_)!dimen + endif + + call mpi_bcast(starts, comm_size, mpi_integer, 0, mpi_comm_world) + call mpi_bcast(ends, comm_size, mpi_integer, 0, mpi_comm_world) + call mpi_bcast(proc_sizes, comm_size, mpi_integer, 0, mpi_comm_world) + call mpi_bcast(proc_offsets, comm_size, mpi_integer, 0, mpi_comm_world) + + + !if (proc_rank.eq.0) write(*,*) "PROC_SIZES:", proc_sizes + + blocksize = proc_sizes(proc_index) + startdim = starts(proc_index) + enddim = ends(proc_index) + + co_startdim = startdim + co_enddim = enddim + + !if (mod(comm_size,2)) then + ! do i=1,comm_size + ! if (i.eq.proc_index) then + ! send_or_recv(i) = 0 + ! !elseif ( (i .ge. (proc_index - to_calc) .and. i .lt. proc_index) ) .or. & + ! elseif ( ((i.gt.(proc_index - to_calc) .and. i.lt.proc_index)) .or. & + ! ((proc_index-to_calc).lt.1 .and. (i-comm_size).gt.(proc_index-to_calc))) then + ! send_or_recv(i) = 1 ! send + ! else + ! send_or_recv(i) = -1 ! recv + ! endif + ! end do + !else + ! do i=1,comm_size + ! if (mod(i,2)) then + ! to_calc = comm_size/2+1 + ! else + ! to_calc = comm_size/2 + ! endif + + ! if (i.eq.proc_index) then + ! send_or_recv(i) = 0 + ! !elseif ( (i .ge. (proc_index - to_calc) .and. i .lt. proc_index) ) .or. & + ! elseif ( ((i.gt.(proc_index - to_calc) .and. i.lt.proc_index)) .or. & + ! ((proc_index-to_calc).lt.1 .and. (i-comm_size).gt.(proc_index-to_calc))) then + ! send_or_recv(i) = 1 ! send + ! else + ! send_or_recv(i) = -1 ! recv + ! endif + ! end do + !endif + + do i=1,comm_size + if (mod(comm_size,2).eq.1) then + to_calc = comm_size/2+1 + else + if ((mod(comm_size, 4).eq.0) .and. ((mod(i,2).eq.1 .and. i.le.comm_size/2).or.(mod(i,2).eq.0 .and. i.gt.comm_size/2))) then + to_calc = comm_size/2+1 + elseif (mod(i,2).eq.1 .and. (mod(comm_size, 4).gt.0)) then + to_calc = comm_size/2+1 + else + to_calc = comm_size/2 + endif + endif + + !if (proc_rank .eq. 0) write(*,*) "TOCALC:", i, to_calc, comm_size, mod(comm_size,4) + + if (i.eq.proc_index) then + send_or_recv(i) = 0 + !!!!!!!!elseif ( (i .ge. (proc_index - to_calc) .and. i .lt. proc_index) ) .or. & + elseif ( ((i.gt.(proc_index - to_calc) .and. i.lt.proc_index)) .or. & + ((proc_index-to_calc).lt.1 .and. (i-comm_size).gt.(proc_index-to_calc))) then + send_or_recv(i) = 1 ! send + !elseif (((i.gt.proc_index .and. i.lt.(proc_index+to_calc))) .or. (proc_index+to_calc.gt.comm_size .and. i.lt.mod(proc_index+to_calc,comm_size))) then + send_or_recv(i) = 1 ! send + else + send_or_recv(i) = -1 ! recv + endif + end do + + !write(*,*) "SENDRECV:", proc_index, send_or_recv + + + endif + + deallocate(starts,ends) + + distr_inited = .true. + end subroutine co_init_distr + + subroutine co_distr_data(x, tmp, blocksize, lb, ub) + use accuracy + + + real(rk),dimension(:,lb:),intent(inout) :: x + real(rk),dimension(:,:,:),intent(inout) :: tmp + integer,intent(in) :: blocksize, lb, ub + + integer :: i, icoeff, jcoeff, offset, ierr + type(MPI_Request) :: reqs(comm_size) + + + !!!write(*,*) "DISTR1", proc_rank, send_or_recv + !!!write(*,*) "DISTR2", proc_rank, blocksize, lb, ub + !!!write(*,*) "DISTR3", proc_rank, shape(x), shape(tmp) + call TimerStart('MPI_transpose') + call TimerStart('MPI_transpose_sendrecv') + + do i=1,comm_size + if (send_or_recv(i).eq.1) then + call mpi_isend(x(((i-1)*blocksize)+1:i*blocksize,:),blocksize*blocksize,mpi_double_precision,i-1,0,mpi_comm_world,reqs(i),ierr) + elseif (send_or_recv(i).eq.-1) then + call mpi_irecv(tmp(:,:,i),blocksize*blocksize,mpi_double_precision,i-1,mpi_any_tag,mpi_comm_world,reqs(i),ierr) + else + reqs(i) = MPI_REQUEST_NULL + endif + enddo + + call mpi_waitall(comm_size,reqs,mpi_statuses_ignore,ierr) + call TimerStop('MPI_transpose_sendrecv') + call TimerStart('MPI_transpose_local') + + do i=1,comm_size + if (send_or_recv(i).eq.-1) then + offset = (i-1)*blocksize + !$omp parallel do private(icoeff,jcoeff) shared(i,x) schedule(static) + do icoeff=lb,ub + do jcoeff=offset+1,offset+blocksize + x(jcoeff,icoeff) = tmp(icoeff-lb+1,jcoeff-offset,i) + enddo + enddo + !$omp end parallel do + endif + enddo + call TimerStop('MPI_transpose_local') + call TimerStop('MPI_transpose') + + end subroutine co_distr_data + + subroutine co_write_matrix_distr(x, longdim, lb, ub, outfile) + use accuracy + + real(rk),dimension(:,lb:),intent(in) :: x + integer,intent(in) :: longdim, lb, ub + type(MPI_File),intent(in) :: outfile + integer :: ierr, mpi_real_size, writecount + integer(kind=MPI_Offset_kind) :: mpioffset,mpi_write_offsetkind + type(MPI_Status) :: writestat + + + call TimerStart('MPI_write') + + !if (proc_rank .eq. 0) then + call mpi_file_get_size(outfile,mpioffset,ierr) + !endif + + !call mpi_bcast(mpioffset,1,mpi_integer,0,mpi_comm_world,ierr) + + call MPI_Type_size(mpi_double_precision, mpi_real_size,ierr) + + mpioffset = mpioffset + proc_rank * (longdim * int(1+real(longdim/comm_size),mpi_offset_kind) * mpi_real_size) + + writecount = int(1+real(longdim/comm_size)) + mpi_write_offsetkind = int(1+real(longdim/comm_size),MPI_Offset_kind) + call MPI_File_write_at_all(outfile,mpioffset,x,writecount,mpitype_column,writestat,ierr) + + call TimerStop('MPI_write') + + end subroutine co_write_matrix_distr + + subroutine co_create_type(extent) + integer, intent(in) :: extent + integer :: ierr + + call MPI_Type_contiguous(extent, mpi_double_precision, mpitype_column, ierr) + call MPI_Type_commit(mpitype_column, ierr) + + end subroutine co_create_type + +end module diff --git a/fields.f90 b/fields.f90 index d16912f..33576ff 100644 --- a/fields.f90 +++ b/fields.f90 @@ -15577,7 +15577,7 @@ subroutine fingerprintRead ! !read(chkptIO,"(6x,i4,1x,3(a10,1x),i5,3x,a2,3x,i2,5x,i2,1x,3i4,2x,f6.1,2x,i9,1x,2f9.3,1x,i,i)") imode_,bs_ ! - read(chkptIO,"(6x,i4,1x,3(a10,1x),i5,3x,a2,3x,i2,5x,i2,1x,2i4,2x,f6.1,2x,i9,1x,2f9.3,1x,i2,1x,i2,1x,a10,i9,i2,i2)") & + read(chkptIO,"(6x,i4,1x,3(a10,1x),i5,3x,a2,3x,i2,5x,i2,1x,2i4,2x,f6.1,2x,i9,1x,2f9.3,1x,l,1x,i2,1x,a10,i9,i2,i2)") & imode_,bs_%type,bs_%COORD_KINET,bs_%COORD_POTEN,bs_%MODEL,bs_%DIM,bs_%SPECIES,bs_%CLASS,bs_%RANGE,& bs_%RES_COEFFS,bs_%NPOINTS,bs_%BORDERS,bs_%PERIODIC,bs_%IPERIOD ! diff --git a/makefile b/makefile index 05dd386..f439d62 100644 --- a/makefile +++ b/makefile @@ -32,11 +32,11 @@ LIB = $(LAPACK) trove.x: trove.o accuracy.o perturbation.o fields.o symmetry.o molecules.o me_numer.o me_str.o me_bnd.o me_rot.o \ lapack.o plasma.o moltype.o refinement.o dipole.o refinement.o tran.o diag.o timer.o input.o \ mol_xy.o mol_xy2.o mol_xy3.o mol_xy4.o mol_zxy2.o mol_zxy3.o mol_ch3oh.o mol_abcd.o mol_c2h4.o mol_c2h6.o mol_c3h6.o \ - pot_xy2.o pot_xy3.o pot_xy4.o pot_zxy2.o pot_zxy3.o pot_ch3oh.o pot_abcd.o pot_c2h4.o pot_c2h6.o pot_c3h6.o $(pot_user).o + pot_xy2.o pot_xy3.o pot_xy4.o pot_zxy2.o pot_zxy3.o pot_ch3oh.o pot_abcd.o pot_c2h4.o pot_c2h6.o pot_c3h6.o coarray_aux.o $(pot_user).o $(FOR) $(FFLAGS) -o j-trove$(PLAT).x $^ $(LIB) trove.o: accuracy.o fields.o perturbation.o symmetry.o timer.o moltype.o dipole.o refinement.o tran.o -perturbation.o: accuracy.o molecules.o lapack.o fields.o timer.o symmetry.o diag.o plasma.o +perturbation.o: accuracy.o molecules.o lapack.o fields.o timer.o symmetry.o diag.o plasma.o coarray_aux.o fields.o: accuracy.o molecules.o lapack.o me_str.o timer.o me_numer.o input.o me_rot.o moltype.o symmetry.o me_bnd.o symmetry.o: accuracy.o molecules.o: accuracy.o moltype.o mol_xy.o mol_xy2.o mol_xy3.o mol_xy4.o mol_zxy2.o mol_zxy3.o mol_ch3oh.o mol_abcd.o mol_c2h4.o mol_c2h6.o mol_c3h6.o \ @@ -82,5 +82,7 @@ pot_c2h6.o: accuracy.o moltype.o pot_c3h6.o: accuracy.o moltype.o pot_abcd.o: accuracy.o moltype.o lapack.o +coarray_aux.o: timer.o + clean: rm -f *.mod *.o diff --git a/perturbation.f90 b/perturbation.f90 index 9e5c415..1c6080e 100644 --- a/perturbation.f90 +++ b/perturbation.f90 @@ -6508,6 +6508,7 @@ end subroutine PTsymmetrization ! Here we construct the Hamiltonian matrix in the contracted basis set representation ! subroutine PThamiltonian_contract(jrot) + use coarray_aux integer(ik),intent(in) :: jrot ! rotational quantum number ! @@ -6522,7 +6523,7 @@ subroutine PThamiltonian_contract(jrot) integer(ik),allocatable :: ijterm(:,:),k_row(:,:),bterm(:,:) ! real(rk) :: zpe - integer :: slevel,dimen_s,max_dim,iterm,jterm,total_roots,icontr + integer :: slevel,dimen_s,max_dim,iterm,jterm,total_roots,icontr,ierr ! integer(ik) :: iunit,unitO,unitC,rec_len,irec_len,chkptIO integer(ik) :: ncontr,maxcontr,maxcontr0 @@ -6568,7 +6569,7 @@ subroutine PThamiltonian_contract(jrot) ! Prepare the storing information if necessary: ! if ( trim(job%IOeigen_action)=='SAVE'.or.trim(job%IOeigen_action)=='APPEND' ) then - call check_point_active_space(job%IOeigen_action) + if (proc_rank.eq.0) call check_point_active_space(job%IOeigen_action) endif ! ! obtain zpe @@ -6694,7 +6695,7 @@ subroutine PThamiltonian_contract(jrot) ! if ( trim(job%IOeigen_action)=='SAVE'.or.trim(job%IOeigen_action)=='APPEND' ) then ! - call check_point_active_space('CLOSE') + if (proc_rank.eq.0) call check_point_active_space('CLOSE') ! endif ! @@ -7272,211 +7273,117 @@ subroutine PThamiltonian_contract(jrot) ! endif ! + do isym = 1,sym%Nrepresen + call co_sum(smat(isym)%coeffs,0) + enddo call TimerStop('Calculating the Hamiltonian matrix') ! if (job%verbose>=4) write(out,"('...done!')") - ! Correction for the case we do not compute the vibrational part of the - ! Hamiltonian: - ! - if (.not.PTvibrational_me_calc) then - ! - if (job%verbose>=3) write(out,"('Diagonal correction...')") + if (proc_rank.eq.0) then!mpiio + ! Correction for the case we do not compute the vibrational part of the + ! Hamiltonian: ! - do isym = 1,sym%Nrepresen + if (.not.PTvibrational_me_calc) then ! - if (.not.job%select_gamma(isym)) cycle + if (job%verbose>=3) write(out,"('Diagonal correction...')") ! - !$omp parallel do private(ielem,icase,slevel,k_i,iterm) - do ielem = 1,Nterms(isym) - ! - icase = PT%symactive_space(isym)%sym_N(ielem,1) - slevel = PT%contractive_space(1,icase) - ! - k_i = k_row(isym,ielem) - ! - iterm = ielem - kblock(isym,max(k_i-2,0),1) + 1 - ! - smat(isym)%coeffs(ielem,iterm) = smat(isym)%coeffs(ielem,iterm) + & - contr(1)%eigen(slevel)%value + do isym = 1,sym%Nrepresen + ! + if (.not.job%select_gamma(isym)) cycle + ! + !$omp parallel do private(ielem,icase,slevel,k_i,iterm) + do ielem = 1,Nterms(isym) + ! + icase = PT%symactive_space(isym)%sym_N(ielem,1) + slevel = PT%contractive_space(1,icase) + ! + k_i = k_row(isym,ielem) + ! + iterm = ielem - kblock(isym,max(k_i-2,0),1) + 1 + ! + smat(isym)%coeffs(ielem,iterm) = smat(isym)%coeffs(ielem,iterm) + & + contr(1)%eigen(slevel)%value + enddo + !$omp end parallel do + ! enddo - !$omp end parallel do ! - enddo - ! - endif - ! - ! Loop over all symmetries and diagonalize or save to disk the Hamiltonian matrices. - ! For diagonalization, the smat-object is copied to the real matrix a. - ! For storing only the lower part is saved. - ! - rlevel = PTNlevels - total_roots = PTNroots - zpe = 0 - ! - do isym = 1,sym%Nrepresen - ! - if (.not.job%select_gamma(isym)) cycle + endif ! - dimen_s = PT%Max_sym_levels(isym) + ! Loop over all symmetries and diagonalize or save to disk the Hamiltonian matrices. + ! For diagonalization, the smat-object is copied to the real matrix a. + ! For storing only the lower part is saved. ! - allocate (bterm(dimen_s,2),stat=alloc) - call ArrayStart('PThamiltonian_contract:bterm',alloc,size(bterm),kind(bterm)) + rlevel = PTNlevels + total_roots = PTNroots + zpe = 0 ! - if (job%sparse) then + do isym = 1,sym%Nrepresen ! - dimen_maxrow = 1 - do k_i = 0,jrot - dimen_row = kblock(isym,min(k_i+2,jrot),2)-kblock(isym,max(k_i-2,0),1)+1 - dimen_maxrow = max(dimen_maxrow,dimen_row) - if (job%verbose>=6) write(out,"('k_i = ',i9,'; dimen_row = ',i9)") k_i,dimen_row - enddo + if (.not.job%select_gamma(isym)) cycle ! - !if (job%nroots(isym)/=1000000) nroots = min(job%nroots(isym),dimen_s) + dimen_s = PT%Max_sym_levels(isym) ! - nroots = min(job%nroots(isym),dimen_s) - if (job%nroots(isym)==0.or.job%nroots(isym)==1000000) nroots=dimen_maxrow - ! - dimen_maxrow = max(dimen_maxrow,nroots) + allocate (bterm(dimen_s,2),stat=alloc) + call ArrayStart('PThamiltonian_contract:bterm',alloc,size(bterm),kind(bterm)) ! - if (only_store.or.no_diagonalization) then - ! - allocate (a(dimen_s,1),stat=alloc) - call ArrayStart('PThamiltonian_contract:a',alloc,size(a),kind(a)) - ! - else + if (job%sparse) then ! - matsize = int(dimen_s,hik)*int(dimen_maxrow,hik) - if (job%verbose>=4) write(out,"('Allocate array a (sparse)',i8,'x',i8,' = ',i9)") dimen_s,dimen_maxrow,matsize - allocate (a(dimen_s,dimen_maxrow),stat=alloc) + dimen_maxrow = 1 + do k_i = 0,jrot + dimen_row = kblock(isym,min(k_i+2,jrot),2)-kblock(isym,max(k_i-2,0),1)+1 + dimen_maxrow = max(dimen_maxrow,dimen_row) + if (job%verbose>=6) write(out,"('k_i = ',i9,'; dimen_row = ',i9)") k_i,dimen_row + enddo ! - call ArrayStart('PThamiltonian_contract:a',alloc,1,kind(a),matsize) + !if (job%nroots(isym)/=1000000) nroots = min(job%nroots(isym),dimen_s) ! - endif - ! - else - ! - matsize = 1 - ! - if (only_store.or.no_diagonalization) then + nroots = min(job%nroots(isym),dimen_s) + if (job%nroots(isym)==0.or.job%nroots(isym)==1000000) nroots=dimen_maxrow + ! + dimen_maxrow = max(dimen_maxrow,nroots) ! - allocate (a(dimen_s,1),stat=alloc) - call ArrayStart('PThamiltonian_contract:a',alloc,size(a),kind(a)) + if (only_store.or.no_diagonalization) then + ! + allocate (a(dimen_s,1),stat=alloc) + call ArrayStart('PThamiltonian_contract:a',alloc,size(a),kind(a)) + ! + else + ! + matsize = int(dimen_s,hik)*int(dimen_maxrow,hik) + if (job%verbose>=4) write(out,"('Allocate array a (sparse)',i8,'x',i8,' = ',i9)") dimen_s,dimen_maxrow,matsize + allocate (a(dimen_s,dimen_maxrow),stat=alloc) + ! + call ArrayStart('PThamiltonian_contract:a',alloc,1,kind(a),matsize) + ! + endif ! - elseif (.not.only_store) then + else ! - matsize = int(dimen_s,hik)*int(dimen_s,hik) - if (job%verbose>=4) write(out,"('Allocate array a',i8,'x',i8,' = ',i9)") dimen_s,dimen_s,matsize - allocate (a(dimen_s,dimen_s),stat=alloc) + matsize = 1 ! - call ArrayStart('PThamiltonian_contract:a',alloc,1,kind(a),matsize) + if (only_store.or.no_diagonalization) then + ! + allocate (a(dimen_s,1),stat=alloc) + call ArrayStart('PThamiltonian_contract:a',alloc,size(a),kind(a)) + ! + elseif (.not.only_store) then + ! + matsize = int(dimen_s,hik)*int(dimen_s,hik) + if (job%verbose>=4) write(out,"('Allocate array a',i8,'x',i8,' = ',i9)") dimen_s,dimen_s,matsize + allocate (a(dimen_s,dimen_s),stat=alloc) + ! + call ArrayStart('PThamiltonian_contract:a',alloc,1,kind(a),matsize) + ! + endif ! endif ! - endif - ! - if (job%verbose>=4) call MemoryReport - ! - if (job%verbose>=4) write(out,"(/' Prepare the sparse representation address-array...')") - ! - !$omp parallel do private(ielem,icase,slevel,k_i,dimen_row,istart,iend) shared(bterm) - do ielem = 1,Nterms(isym) - ! - icase = PT%symactive_space(isym)%sym_N(ielem,1) - slevel = PT%contractive_space(1,icase) - ! - k_i = k_row(isym,ielem) - ! - dimen_row = kblock(isym,k_i,2)-kblock(isym,max(k_i-2,0),1)+1 - ! - bterm(ielem,1) = kblock(isym,max(k_i-2, 0),1) - bterm(ielem,2) = kblock(isym,min(k_i+2,jrot),2) - ! - enddo - !$omp end parallel do - ! - if (job%verbose>=4) write(out,"(/' ...done!')") - ! - ! - ! prepare a unit for saving the matrix to the disk, if required - ! - if (only_store) then - ! - write(unitfname,"('matrix for j = ',i6,' sym = ',i4)") jrot,isym - ! - call IOStart(trim(unitfname),chkptIO) - ! - write(jchar, '(i4)') jrot - write(symchar, '(i4)') isym - filename = trim(job%matrix_file)//trim(adjustl(jchar))//'_'//trim(adjustl(symchar))//'.chk' - ! - open(chkptIO,form='unformatted',action='write',position='rewind',status='replace',file=filename) - ! - write(chkptIO) dimen_s - ! - write(chkptIO) 'Lower' - if (job%sparse) then - write(chkptIO) 'spar' - write(chkptIO) 'Start bterm' - write(chkptIO) bterm - else - write(chkptIO) 'full' - endif - ! - write(chkptIO) 'Start matrix' - ! - do ielem = 1,dimen_s - ! - if (job%sparse) then - ! - dimen_row = ielem-bterm(ielem,1)+1 - ! - a(1:dimen_row,1) = smat(isym)%coeffs(ielem,1:dimen_row) - ! - do jelem = ielem+1,bterm(ielem,2) - ! - k_j = jelem - bterm(ielem,1) + 1 - k_i = ielem - bterm(jelem,1) + 1 - ! - a(k_j,1) = smat(isym)%coeffs(jelem,k_i) - ! - enddo - ! - dimen_row = bterm(ielem,2)-bterm(ielem,1)+1 - ! - write(chkptIO) a(1:dimen_row,1) - ! - else - ! - a = 0 - ! - icase = PT%symactive_space(isym)%sym_N(ielem,1) - slevel = PT%contractive_space(1,icase) - ! - k_i = k_row(isym,ielem) - ! - dimen_row = kblock(isym,k_i,2)-kblock(isym,max(k_i-2,0),1)+1 - ! - istart = kblock(isym,max(k_i-2,0),1) - iend = kblock(isym,k_i,2) - ! - a(istart:iend,1) = smat(isym)%coeffs(ielem,1:dimen_row) - ! - write(chkptIO) a - ! - endif - ! - enddo - ! - nroots = 0 - ! - write(chkptIO) 'End matrix' - close(chkptIO,status='keep') - ! - deallocate (smat(isym)%coeffs) - call Arraystop('PThamiltonian_contract:smat'//sym%label(isym)) + if (job%verbose>=4) call MemoryReport ! - elseif (no_diagonalization) then + if (job%verbose>=4) write(out,"(/' Prepare the sparse representation address-array...')") ! + !$omp parallel do private(ielem,icase,slevel,k_i,dimen_row,istart,iend) shared(bterm) do ielem = 1,Nterms(isym) ! icase = PT%symactive_space(isym)%sym_N(ielem,1) @@ -7486,131 +7393,230 @@ subroutine PThamiltonian_contract(jrot) ! dimen_row = kblock(isym,k_i,2)-kblock(isym,max(k_i-2,0),1)+1 ! - if (job%sparse) then - ! - dimen_row = ielem-bterm(ielem,1)+1 - ! - a(ielem,1) = smat(isym)%coeffs(ielem,dimen_row) - ! - else - ! - istart = kblock(isym,max(k_i-2,0),1) - iend = ielem-istart+1 - ! - a(ielem,1) = smat(isym)%coeffs(ielem,iend) - ! - endif + bterm(ielem,1) = kblock(isym,max(k_i-2, 0),1) + bterm(ielem,2) = kblock(isym,min(k_i+2,jrot),2) ! enddo + !$omp end parallel do ! - !do ielem = 1,dimen_s - ! ! - ! if (job%sparse) then - ! ! - ! dimen_row = ielem-bterm(ielem,1)+1 - ! ! - ! a(ielem,1) = smat(isym)%coeffs(ielem,dimen_row) - ! ! - ! else - ! ! - ! a(ielem,1) = smat(isym)%coeffs(ielem,ielem) - ! ! - ! endif - ! ! - !enddo - ! - call diagonalization_contract(jrot,isym,dimen_s,a,zpe,rlevel,total_roots,bterm,k_row(isym,1:dimen_s)) - ! - else ! diagonalize + if (job%verbose>=4) write(out,"(/' ...done!')") ! - a = 0 ! - if (job%verbose>=3) write(out,"(/'Prepare the lower part of the matrix...')") + ! prepare a unit for saving the matrix to the disk, if required ! - !$omp parallel do private(ielem,icase,slevel,k_i,dimen_row,istart,iend) shared(bterm) - do ielem = 1,Nterms(isym) - ! - icase = PT%symactive_space(isym)%sym_N(ielem,1) - slevel = PT%contractive_space(1,icase) - ! - k_i = k_row(isym,ielem) - ! - dimen_row = kblock(isym,k_i,2)-kblock(isym,max(k_i-2,0),1)+1 - ! - if (job%sparse) then + if (only_store) then + ! + write(unitfname,"('matrix for j = ',i6,' sym = ',i4)") jrot,isym + ! + call IOStart(trim(unitfname),chkptIO) + ! + write(jchar, '(i4)') jrot + write(symchar, '(i4)') isym + filename = trim(job%matrix_file)//trim(adjustl(jchar))//'_'//trim(adjustl(symchar))//'.chk' + ! + open(chkptIO,form='unformatted',action='write',position='rewind',status='replace',file=filename) + ! + write(chkptIO) dimen_s + ! + write(chkptIO) 'Lower' + if (job%sparse) then + write(chkptIO) 'spar' + write(chkptIO) 'Start bterm' + write(chkptIO) bterm + else + write(chkptIO) 'full' + endif + ! + write(chkptIO) 'Start matrix' + ! + do ielem = 1,dimen_s ! - a(ielem,1:dimen_row) = smat(isym)%coeffs(ielem,1:dimen_row) + if (job%sparse) then + ! + dimen_row = ielem-bterm(ielem,1)+1 + ! + a(1:dimen_row,1) = smat(isym)%coeffs(ielem,1:dimen_row) + ! + do jelem = ielem+1,bterm(ielem,2) + ! + k_j = jelem - bterm(ielem,1) + 1 + k_i = ielem - bterm(jelem,1) + 1 + ! + a(k_j,1) = smat(isym)%coeffs(jelem,k_i) + ! + enddo + ! + dimen_row = bterm(ielem,2)-bterm(ielem,1)+1 + ! + write(chkptIO) a(1:dimen_row,1) + ! + else + ! + a = 0 + ! + icase = PT%symactive_space(isym)%sym_N(ielem,1) + slevel = PT%contractive_space(1,icase) + ! + k_i = k_row(isym,ielem) + ! + dimen_row = kblock(isym,k_i,2)-kblock(isym,max(k_i-2,0),1)+1 + ! + istart = kblock(isym,max(k_i-2,0),1) + iend = kblock(isym,k_i,2) + ! + a(istart:iend,1) = smat(isym)%coeffs(ielem,1:dimen_row) + ! + write(chkptIO) a + ! + endif ! - else + enddo + ! + nroots = 0 + ! + write(chkptIO) 'End matrix' + close(chkptIO,status='keep') + ! + deallocate (smat(isym)%coeffs) + call Arraystop('PThamiltonian_contract:smat'//sym%label(isym)) + ! + elseif (no_diagonalization) then + ! + do ielem = 1,Nterms(isym) + ! + icase = PT%symactive_space(isym)%sym_N(ielem,1) + slevel = PT%contractive_space(1,icase) ! - istart = kblock(isym,max(k_i-2,0),1) - iend = kblock(isym,k_i,2) + k_i = k_row(isym,ielem) ! - a(ielem,istart:iend) = smat(isym)%coeffs(ielem,1:dimen_row) + dimen_row = kblock(isym,k_i,2)-kblock(isym,max(k_i-2,0),1)+1 ! - endif - ! - enddo - !$omp end parallel do - ! - if (job%verbose>=3) write(out,"('...and the upper part...')") - ! - !$omp parallel do private(ielem,k_i,istart,jelem,k_j,jstart,iterm,jterm) shared(a) schedule(dynamic) - do ielem = 1,Nterms(isym) - ! - if (job%sparse) then + if (job%sparse) then + ! + dimen_row = ielem-bterm(ielem,1)+1 + ! + a(ielem,1) = smat(isym)%coeffs(ielem,dimen_row) + ! + else + ! + istart = kblock(isym,max(k_i-2,0),1) + iend = ielem-istart+1 + ! + a(ielem,1) = smat(isym)%coeffs(ielem,iend) + ! + endif + ! + enddo + ! + !do ielem = 1,dimen_s + ! ! + ! if (job%sparse) then + ! ! + ! dimen_row = ielem-bterm(ielem,1)+1 + ! ! + ! a(ielem,1) = smat(isym)%coeffs(ielem,dimen_row) + ! ! + ! else + ! ! + ! a(ielem,1) = smat(isym)%coeffs(ielem,ielem) + ! ! + ! endif + ! ! + !enddo + ! + call diagonalization_contract(jrot,isym,dimen_s,a,zpe,rlevel,total_roots,bterm,k_row(isym,1:dimen_s)) + ! + else ! diagonalize + ! + a = 0 + ! + if (job%verbose>=3) write(out,"(/'Prepare the lower part of the matrix...')") + ! + !$omp parallel do private(ielem,icase,slevel,k_i,dimen_row,istart,iend) shared(bterm) + do ielem = 1,Nterms(isym) + ! + icase = PT%symactive_space(isym)%sym_N(ielem,1) + slevel = PT%contractive_space(1,icase) ! k_i = k_row(isym,ielem) - istart = kblock(isym,max(k_i-2,0),1) ! - do jelem = ielem+1,kblock(isym,min(k_i+2,jrot),2) - k_j = k_row(isym,jelem) - jstart = kblock(isym,max(k_j-2,0),1) + dimen_row = kblock(isym,k_i,2)-kblock(isym,max(k_i-2,0),1)+1 + ! + if (job%sparse) then + ! + a(ielem,1:dimen_row) = smat(isym)%coeffs(ielem,1:dimen_row) ! - iterm = ielem-jstart+1 - jterm = jelem-istart+1 + else ! - a(ielem,jterm) = a(jelem,iterm) + istart = kblock(isym,max(k_i-2,0),1) + iend = kblock(isym,k_i,2) ! - enddo + a(ielem,istart:iend) = smat(isym)%coeffs(ielem,1:dimen_row) + ! + endif ! - else + enddo + !$omp end parallel do + ! + if (job%verbose>=3) write(out,"('...and the upper part...')") + ! + !$omp parallel do private(ielem,k_i,istart,jelem,k_j,jstart,iterm,jterm) shared(a) schedule(dynamic) + do ielem = 1,Nterms(isym) ! - a(1:ielem-1,ielem) = a(ielem,1:ielem-1) + if (job%sparse) then + ! + k_i = k_row(isym,ielem) + istart = kblock(isym,max(k_i-2,0),1) + ! + do jelem = ielem+1,kblock(isym,min(k_i+2,jrot),2) + k_j = k_row(isym,jelem) + jstart = kblock(isym,max(k_j-2,0),1) + ! + iterm = ielem-jstart+1 + jterm = jelem-istart+1 + ! + a(ielem,jterm) = a(jelem,iterm) + ! + enddo + ! + else + ! + a(1:ielem-1,ielem) = a(ielem,1:ielem-1) + ! + endif ! - endif - ! - enddo - !$omp end parallel do - ! - ! Diagonalization + enddo + !$omp end parallel do + ! + ! Diagonalization + ! + deallocate (smat(isym)%coeffs) + call Arraystop('PThamiltonian_contract:smat'//sym%label(isym)) + ! + if (job%verbose>=3) write(out,"('Diagonalization...')") + ! + if (job%verbose>=1) then + write (out,"(//'Size of the symmetrized hamiltonian = ',i7,' Symmetry = ',a4)") dimen_s,sym%label(isym) + ! write (out,"(/'Variation solution (symmetrized):',/' Gamma i value j k t quanta')") + endif + ! + call diagonalization_contract(jrot,isym,dimen_s,a,zpe,rlevel,total_roots,bterm,k_row(isym,1:dimen_s)) + ! + endif ! store vs diagonalize ! - deallocate (smat(isym)%coeffs) - call Arraystop('PThamiltonian_contract:smat'//sym%label(isym)) + deallocate (bterm) + call ArrayStop('PThamiltonian_contract:bterm') ! - if (job%verbose>=3) write(out,"('Diagonalization...')") + !deallocate (smat(isym)%coeffs) ! - if (job%verbose>=1) then - write (out,"(//'Size of the symmetrized hamiltonian = ',i7,' Symmetry = ',a4)") dimen_s,sym%label(isym) - ! write (out,"(/'Variation solution (symmetrized):',/' Gamma i value j k t quanta')") + !if (.not.only_store.and..not.sparse) then + if (allocated(a)) then + call ArrayStop('PThamiltonian_contract:a') + deallocate (a) endif ! - call diagonalization_contract(jrot,isym,dimen_s,a,zpe,rlevel,total_roots,bterm,k_row(isym,1:dimen_s)) - ! - endif ! store vs diagonalize - ! - deallocate (bterm) - call ArrayStop('PThamiltonian_contract:bterm') - ! - !deallocate (smat(isym)%coeffs) - ! - !if (.not.only_store.and..not.sparse) then - if (allocated(a)) then - call ArrayStop('PThamiltonian_contract:a') - deallocate (a) - endif - ! - enddo + enddo + endif!mpiio ! if ( trim(job%IOeigen_action)=='SAVE'.or.trim(job%IOeigen_action)=='APPEND' ) then ! @@ -14478,6 +14484,7 @@ end subroutine matrix_transform ! Contracted matrix elements ! subroutine PTcontracted_matelem_class(jrot) + use coarray_aux ! implicit none ! @@ -14489,11 +14496,18 @@ subroutine PTcontracted_matelem_class(jrot) real(rk),allocatable :: mat_t(:,:), grot_t(:,:),extF_t(:,:),gvib_t(:,:),hvib_t(:,:),fvib_t(:,:),& matclass(:,:,:),hrot_t(:,:),gcor_t(:,:) real(rk),allocatable :: gcor_(:,:,:,:),grot_(:,:,:,:),extF_dvr(:,:,:),extF_r(:,:) + real(rk),allocatable :: recvbuf(:,:,:) ! real(rk) :: f_t integer(ik) :: isize,iroot integer(ik) :: dimen_p,nroots,chkptIO,extF_rank,chkptIO_ - integer(hik) :: rootsize,rootsize_,matsize + integer(hik) :: rootsize,rootsize_,matsize,blocksize + integer :: startdim, enddim, blocksize_, ierr, b, req_count, offset + type(MPI_Request),allocatable :: reqs(:) + type(MPI_Status) :: reqstat + type(MPI_File) :: chkptMPIIO + integer(kind=MPI_Offset_kind) :: mpioffset + integer :: mpisz ! logical :: treat_rotation =.false. ! switch off/on the rotation logical :: treat_vibration =.true. ! switch off/on the vibration @@ -14502,7 +14516,7 @@ subroutine PTcontracted_matelem_class(jrot) double precision,parameter :: alpha = 1.0d0,beta=0.0d0 character(len=cl) :: job_is,buf ! - integer(ik) :: dimen_p_max,nroots_max,imu,mdimen,mdimen_ + integer(ik) :: dimen_p_max,nroots_max,imu,mdimen,mdimen_,mdimen_b,mdimen_p integer(ik) :: iterm1=0,iterm2=12 integer(ik) :: icoeff,icase,ilambda,jcoeff,idvr integer(ik),allocatable :: extF_N(:),icoeff2iroot(:,:) @@ -14578,6 +14592,14 @@ subroutine PTcontracted_matelem_class(jrot) mdimen = PT%Maxcontracts ! rootsize = int(mdimen*mdimen,hik) + call co_init_distr(mdimen, startdim, enddim, blocksize_) + allocate(reqs(comm_size)) + write(*,*) "SENDRECV:", proc_rank, send_or_recv + + mdimen_p = int(1+real(mdimen/comm_size)) + mdimen_b = comm_size*mdimen_p + write(*,*) "DIMS", proc_rank, mdimen, mdimen_b, mdimen_p, comm_size*mdimen_b*mdimen_p + blocksize = blocksize_ ! ! The vibrational (J=0) matrix elements of the rotational and coriolis ! kinetic parts are retrieved now from the storage place (check_point). @@ -14604,16 +14626,31 @@ subroutine PTcontracted_matelem_class(jrot) ! ! Prepare the checkpoint file ! - job_is ='Vib. matrix elements of the rot. kinetic part' - call IOStart(trim(job_is),chkptIO) + !POSIXIO!job_is ='Vib. matrix elements of the rot. kinetic part' + !POSIXIO!call IOStart(trim(job_is),chkptIO) - open(chkptIO,form='unformatted',action='write',position='rewind',status='replace',file=job%kinetmat_file) - write(chkptIO) 'Start Kinetic part' - ! - ! store the bookkeeping information about the contr. basis set - ! - call PTstore_icontr_cnu(PT%Maxcontracts,chkptIO,job%IOkinet_action) + !POSIXIO!open(chkptIO,form='unformatted',action='write',position='rewind',status='replace',file=job%kinetmat_file) + !POSIXIO!write(chkptIO) 'Start Kinetic part' + !POSIXIO!! + !POSIXIO!! store the bookkeeping information about the contr. basis set + !POSIXIO!! + !POSIXIO!call PTstore_icontr_cnu(PT%Maxcontracts,chkptIO,job%IOkinet_action) ! + call MPI_File_open(mpi_comm_world, 'mpiiofile', mpi_mode_wronly+mpi_mode_create, mpi_info_null, chkptMPIIO, ierr) + call MPI_File_set_errhandler(chkptMPIIO, MPI_ERRORS_ARE_FATAL) + mpioffset=0 + call MPI_File_set_size(chkptMPIIO, mpioffset, ierr) + if (proc_rank.eq.0) then !AT + call TimerStart('mpiiosingle') !AT + + call MPI_File_write(chkptMPIIO,'Start Kinetic part',18,mpi_character,mpi_status_ignore,ierr) + ! + ! store the bookkeeping information about the contr. basis set + ! + call PTstoreMPI_icontr_cnu(PT%Maxcontracts,chkptMPIIO,job%IOkinet_action) + + call TimerStop('mpiiosingle') !AT + endif endif ! ! maximal size of the primitive matrix in all classes @@ -14727,7 +14764,7 @@ subroutine PTcontracted_matelem_class(jrot) ! if (job%verbose>=4) write(out,"(' allocating hvib, ',i9,' elements...')") rootsize ! - allocate(hvib%me(mdimen,mdimen),stat=alloc) + allocate(hvib%me(mdimen_b,startdim:startdim+mdimen_p-1),stat=alloc) call ArrayStart('gvib-grot-gcor-fields',alloc,1,kind(f_t),rootsize) hvib%me = 0 ! @@ -14780,7 +14817,9 @@ subroutine PTcontracted_matelem_class(jrot) endif ! ! - allocate(grot_t(mdimen,mdimen),hrot_t(mdimen,mdimen),gcor_t(mdimen,mdimen),stat=alloc) + allocate(recvbuf(mdimen_p,mdimen_p,comm_size),stat=alloc) + allocate(grot_t(mdimen_b,startdim:startdim+mdimen_p-1),hrot_t(mdimen_b,startdim:startdim+mdimen_p-1), & + gcor_t(mdimen_b,startdim:startdim+mdimen_p-1),stat=alloc) call ArrayStart('grot-gcor-fields',alloc,1,kind(f_t),rootsize) call ArrayStart('grot-gcor-fields',alloc,1,kind(f_t),rootsize) call ArrayStart('grot-gcor-fields',alloc,1,kind(f_t),rootsize) @@ -14789,7 +14828,8 @@ subroutine PTcontracted_matelem_class(jrot) ! if (trim(job%IOkinet_action)=='SAVE'.and..not.job%IOmatelem_split) then ! - write(chkptIO) 'g_rot' + !POSIXIO!write(chkptIO) 'g_rot' + if(proc_rank.eq.0) call MPI_File_write(chkptMPIIO,'g_rot',5,mpi_character,mpi_status_ignore,ierr) ! endif ! @@ -14801,6 +14841,7 @@ subroutine PTcontracted_matelem_class(jrot) islice = 0 job_is = 'grot' ! + call co_create_type(mdimen) do k1 = 1,3 do k2 = 1,3 ! @@ -14821,37 +14862,35 @@ subroutine PTcontracted_matelem_class(jrot) ! call calc_contract_matrix_elements_II(iterm,k1,k2,fl,hrot_t,grot_contr_matelem_single_term) ! - !$omp parallel do private(icoeff,jcoeff) shared(grot_t) schedule(dynamic) - do icoeff=1,mdimen - do jcoeff=1,icoeff - grot_t(jcoeff,icoeff) = grot_t(jcoeff,icoeff) + hrot_t(jcoeff,icoeff) - enddo + do b=1,comm_size + if (send_or_recv(b).ge.0) then + !$omp parallel do private(icoeff,jcoeff) shared(b,grot_t) schedule(static) + do icoeff=startdim,enddim + do jcoeff=((b-1)*mdimen_p)+1,b*mdimen_p + grot_t(jcoeff,icoeff) = grot_t(jcoeff,icoeff) + hrot_t(jcoeff,icoeff) + enddo + enddo + !$omp end parallel do + endif enddo - !$omp end parallel do ! enddo ! - !$omp parallel do private(icoeff,jcoeff) shared(grot_t) schedule(dynamic) - do icoeff=1,mdimen - do jcoeff=1,icoeff-1 - grot_t(icoeff,jcoeff) = grot_t(jcoeff,icoeff) - enddo - enddo - !$omp end parallel do ! - if (trim(job%IOkinet_action)=='SAVE') then - if (job%IOmatelem_split) then - ! - call write_divided_slice(islice,'g_rot',job%matelem_suffix,mdimen,grot_t) - ! - else - ! - ! store the matrix elements - ! - write(chkptIO) grot_t - ! - endif - endif + !POSIXIO!if (trim(job%IOkinet_action)=='SAVE') then + !POSIXIO! if (job%IOmatelem_split) then + !POSIXIO! ! + !POSIXIO! call write_divided_slice(islice,'g_rot',job%matelem_suffix,mdimen,grot_t) + !POSIXIO! ! + !POSIXIO! else + !POSIXIO! ! + !POSIXIO! ! store the matrix elements + !POSIXIO! ! + !POSIXIO! write(chkptIO) grot_t + !POSIXIO! ! + !POSIXIO! endif + !POSIXIO!endif + call co_write_matrix_distr(grot_t,mdimen, startdim, enddim,chkptMPIIO) ! enddo enddo @@ -14860,7 +14899,8 @@ subroutine PTcontracted_matelem_class(jrot) ! if (trim(job%IOkinet_action)=='SAVE'.and..not.job%IOmatelem_split) then ! - write(chkptIO) 'g_cor' + !POSIXIO!write(chkptIO) 'g_cor' + if(proc_rank.eq.0) call MPI_File_write(chkptMPIIO,'g_cor',5,mpi_character,mpi_status_ignore,ierr) ! endif ! @@ -14897,23 +14937,25 @@ subroutine PTcontracted_matelem_class(jrot) ! call calc_contract_matrix_elements_II(iterm,k1,k2,fl,hrot_t,gcor_contr_matelem_single_term) ! - !$omp parallel do private(icoeff,jcoeff) shared(grot_t) schedule(dynamic) - do icoeff=1,mdimen - do jcoeff=1,icoeff - grot_t(jcoeff,icoeff) = grot_t(jcoeff,icoeff) + hrot_t(jcoeff,icoeff) - enddo + do b=1,comm_size + if (send_or_recv(b).ge.0) then + !$omp parallel do private(icoeff,jcoeff) shared(b,grot_t) schedule(static) + do icoeff=startdim,enddim + do jcoeff=((b-1)*mdimen_p)+1,b*mdimen_p + grot_t(jcoeff,icoeff) = grot_t(jcoeff,icoeff) + hrot_t(jcoeff,icoeff) + enddo + enddo + !$omp end parallel do + endif enddo - !$omp end parallel do ! enddo - !$omp parallel do private(icoeff,jcoeff) shared(grot_t) schedule(dynamic) - do icoeff=1,mdimen + call co_distr_data(grot_t, recvbuf, mdimen_p, startdim, enddim) + do icoeff=startdim,enddim do jcoeff=1,icoeff-1 - grot_t(icoeff,jcoeff) = grot_t(jcoeff,icoeff) - grot_t(jcoeff,icoeff) = -grot_t(jcoeff,icoeff) + grot_t(jcoeff,icoeff) = -1*grot_t(jcoeff,icoeff) enddo enddo - !$omp end parallel do ! if (job%IOmatelem_divide) then ! @@ -14922,7 +14964,7 @@ subroutine PTcontracted_matelem_class(jrot) else ! !$omp parallel do private(icoeff) shared(gcor_t) schedule(dynamic) - do icoeff=1,mdimen + do icoeff=startdim,enddim gcor_t(icoeff,:) = gcor_t(icoeff,:)+grot_t(icoeff,:) enddo !$omp end parallel do @@ -14931,20 +14973,21 @@ subroutine PTcontracted_matelem_class(jrot) ! enddo ! - if (trim(job%IOkinet_action)=='SAVE') then - ! - if (job%IOmatelem_split) then - ! - call write_divided_slice(islice,'g_cor',job%matelem_suffix,mdimen,gcor_t) - ! - else - ! - ! store the matrix elements - ! - write(chkptIO) gcor_t - ! - endif - endif + !POSIXIO!if (trim(job%IOkinet_action)=='SAVE') then + !POSIXIO! ! + !POSIXIO! if (job%IOmatelem_split) then + !POSIXIO! ! + !POSIXIO! call write_divided_slice(islice,'g_cor',job%matelem_suffix,mdimen,gcor_t) + !POSIXIO! ! + !POSIXIO! else + !POSIXIO! ! + !POSIXIO! ! store the matrix elements + !POSIXIO! ! + !POSIXIO! write(chkptIO) gcor_t + !POSIXIO! ! + !POSIXIO! endif + !POSIXIO!endif + call co_write_matrix_distr(gcor_t,mdimen, startdim, enddim,chkptMPIIO) ! enddo ! @@ -15006,36 +15049,36 @@ subroutine PTcontracted_matelem_class(jrot) ! if (job%verbose>=2) write(out,"('...end!')") ! - if (treat_rotation.and.trim(job%IOkinet_action)=='SAVE') then - ! - ! store the rotational matrix elements - ! - write(chkptIO) 'g_rot' - ! - do k1 = 1,3 - do k2 = 1,3 - ! - write(chkptIO) grot_(k1,k2,:,:) - ! - enddo - enddo - ! - write(chkptIO) 'g_cor' - ! - ! store the Coriolis matrix elements - ! - do k1 = 1,PT%Nmodes - do k2 = 1,3 - ! - write(chkptIO) gcor_(k1,k2,:,:) - ! - enddo - enddo - ! - deallocate(grot_,gcor_) - call ArrayStop('grot-gcor-fields') - ! - endif + !POSIXIO!if (treat_rotation.and.trim(job%IOkinet_action)=='SAVE') then + !POSIXIO! ! + !POSIXIO! ! store the rotational matrix elements + !POSIXIO! ! + !POSIXIO! write(chkptIO) 'g_rot' + !POSIXIO! ! + !POSIXIO! do k1 = 1,3 + !POSIXIO! do k2 = 1,3 + !POSIXIO! ! + !POSIXIO! write(chkptIO) grot_(k1,k2,:,:) + !POSIXIO! ! + !POSIXIO! enddo + !POSIXIO! enddo + !POSIXIO! ! + !POSIXIO! write(chkptIO) 'g_cor' + !POSIXIO! ! + !POSIXIO! ! store the Coriolis matrix elements + !POSIXIO! ! + !POSIXIO! do k1 = 1,PT%Nmodes + !POSIXIO! do k2 = 1,3 + !POSIXIO! ! + !POSIXIO! write(chkptIO) gcor_(k1,k2,:,:) + !POSIXIO! ! + !POSIXIO! enddo + !POSIXIO! enddo + !POSIXIO! ! + !POSIXIO! deallocate(grot_,gcor_) + !POSIXIO! call ArrayStop('grot-gcor-fields') + !POSIXIO! ! + !POSIXIO!endif ! else ! if (.not.job%IOmatelem_split.or.job%iswap(1)==0 ) then ! @@ -15044,10 +15087,11 @@ subroutine PTcontracted_matelem_class(jrot) if (job%verbose>=2) write(out,"(/'Vibrational kinetic part...')") if (job%verbose>=3) write(out,"(/'Number of gvib terms = ',i8)") gvib_N ! - allocate(hvib_t(mdimen,mdimen),gvib_t(mdimen,mdimen),fvib_t(mdimen,mdimen),stat=alloc) - call ArrayStart('hvib-fields',alloc,1,kind(f_t),rootsize) - call ArrayStart('hvib-fields',alloc,1,kind(f_t),rootsize) - call ArrayStart('hvib-fields',alloc,1,kind(f_t),rootsize) + allocate(hvib_t(mdimen_b,startdim:startdim+mdimen_p-1),gvib_t(mdimen_b,startdim:startdim+mdimen_p-1), & + fvib_t(mdimen_b,startdim:startdim+mdimen_p-1),stat=alloc) + call ArrayStart('hvib-fields',alloc,1,kind(f_t),blocksize) + call ArrayStart('hvib-fields',alloc,1,kind(f_t),blocksize) + call ArrayStart('hvib-fields',alloc,1,kind(f_t),blocksize) ! islice = 0 ! @@ -15081,18 +15125,22 @@ subroutine PTcontracted_matelem_class(jrot) ! call calc_contract_matrix_elements_II(iterm,k1,k2,fl,fvib_t,gvib_contr_matelem_single_term) ! - !$omp parallel do private(icoeff,jcoeff) shared(gvib_t) schedule(dynamic) - do icoeff=1,mdimen - do jcoeff=1,icoeff - gvib_t(jcoeff,icoeff) = gvib_t(jcoeff,icoeff) + fvib_t(jcoeff,icoeff) - ! - enddo + do b=1,comm_size + if (send_or_recv(b).ge.0) then + !$omp parallel do private(icoeff,jcoeff) shared(b,grot_t) schedule(static) + do icoeff=startdim,enddim + do jcoeff=((b-1)*mdimen_p)+1,b*mdimen_p + gvib_t(jcoeff,icoeff) = gvib_t(jcoeff,icoeff) + fvib_t(jcoeff,icoeff) + enddo + enddo + !$omp end parallel do + endif enddo - !$omp end parallel do ! enddo ! if (job%IOmatelem_divide) then + !TODO ! !$omp parallel do private(icoeff,jcoeff) shared(gvib_t) schedule(dynamic) do icoeff=1,mdimen @@ -15106,13 +15154,17 @@ subroutine PTcontracted_matelem_class(jrot) ! else ! - !$omp parallel do private(icoeff,jcoeff) shared(hvib_t) schedule(dynamic) - do icoeff=1,mdimen - do jcoeff=1,icoeff - hvib_t(jcoeff,icoeff) = hvib_t(jcoeff,icoeff)-0.5_rk*gvib_t(jcoeff,icoeff) - enddo + do b=1,comm_size + if (send_or_recv(b).ge.0) then + !$omp parallel do private(icoeff,jcoeff) shared(b,grot_t) schedule(static) + do icoeff=startdim,enddim + do jcoeff=((b-1)*mdimen_p)+1,b*mdimen_p + hvib_t(jcoeff,icoeff) = hvib_t(jcoeff,icoeff)-0.5_rk*gvib_t(jcoeff,icoeff) + enddo + enddo + !$omp end parallel do + endif enddo - !$omp end parallel do ! endif ! @@ -15140,13 +15192,17 @@ subroutine PTcontracted_matelem_class(jrot) ! call calc_contract_matrix_elements_II(iterm,1,1,fl,fvib_t,poten_contr_matelem_single_term) ! - !$omp parallel do private(icoeff,jcoeff) shared(gvib_t) schedule(dynamic) - do icoeff=1,mdimen - do jcoeff=1,icoeff - gvib_t(jcoeff,icoeff) = gvib_t(jcoeff,icoeff) + fvib_t(jcoeff,icoeff) - enddo + do b=1,comm_size + if (send_or_recv(b).ge.0) then + !$omp parallel do private(icoeff,jcoeff) shared(b,grot_t) schedule(static) + do icoeff=startdim,enddim + do jcoeff=((b-1)*mdimen_p)+1,b*mdimen_p + gvib_t(jcoeff,icoeff) = gvib_t(jcoeff,icoeff) + fvib_t(jcoeff,icoeff) + enddo + enddo + !$omp end parallel do + endif enddo - !$omp end parallel do ! enddo ! @@ -15216,16 +15272,20 @@ subroutine PTcontracted_matelem_class(jrot) ! if ( .not.job%IOmatelem_divide.and..not.job%IOmatelem_split ) then ! - !$omp parallel do private(icoeff,jcoeff) schedule(dynamic) - do icoeff=1,mdimen - do jcoeff=1,icoeff - hvib%me(jcoeff,icoeff) = hvib_t(jcoeff,icoeff)+gvib_t(jcoeff,icoeff) - hvib%me(icoeff,jcoeff) = hvib%me(jcoeff,icoeff) - enddo + do b=1,comm_size + if (send_or_recv(b).ge.0) then + !$omp parallel do private(icoeff,jcoeff) shared(b,grot_t) schedule(static) + do icoeff=startdim,enddim + do jcoeff=((b-1)*mdimen_p)+1,b*mdimen_p + hvib%me(jcoeff,icoeff) = hvib_t(jcoeff,icoeff) + gvib_t(jcoeff,icoeff) + enddo + enddo + !$omp end parallel do + endif enddo - !$omp end parallel do ! endif + call co_distr_data(hvib%me, recvbuf, mdimen_p, startdim, enddim) ! deallocate(hvib_t,gvib_t,fvib_t) call ArrayStop('hvib-fields') @@ -15240,8 +15300,10 @@ subroutine PTcontracted_matelem_class(jrot) (.not.job%IOmatelem_divide.or.job%iswap(1)==0) .and. & (.not.job%IOmatelem_split.or.job%iswap(1)==0)) then ! - write(chkptIO) 'hvib' - write(chkptIO) hvib%me + !POSIXIO!write(chkptIO) 'hvib' + !POSIXIO!write(chkptIO) hvib%me + if(proc_rank.eq.0) call MPI_File_write(chkptMPIIO,'hvib',4,mpi_character,mpi_status_ignore,ierr) + call co_write_matrix_distr(hvib%me,mdimen, startdim, enddim,chkptMPIIO) ! endif ! @@ -15253,8 +15315,10 @@ subroutine PTcontracted_matelem_class(jrot) (.not.job%IOmatelem_divide.or.job%iswap(1)==0 ).and. & (.not.job%IOmatelem_split.or.job%iswap(1)==0 ) ) then ! - write(chkptIO) 'End Kinetic part' - close(chkptIO,status='keep') + !POSIXIO!write(chkptIO) 'End Kinetic part' + !POSIXIO!close(chkptIO,status='keep') + if(proc_rank.eq.0) call MPI_File_write(chkptMPIIO,'End Kinetic Part',16,mpi_character,mpi_status_ignore,ierr) + call MPI_File_close(chkptMPIIO, ierr) ! endif ! @@ -15280,22 +15344,37 @@ subroutine PTcontracted_matelem_class(jrot) ! Prepare the checkpoint file ! job_is ='external field contracted matrix elements for J=0' - call IOStart(trim(job_is),chkptIO) - ! - open(chkptIO,form='unformatted',action='write',position='rewind',status='replace',file=job%extFmat_file) - write(chkptIO) 'Start external field' - ! - ! store the matrix elements - ! - write(chkptIO) PT%Maxcontracts + !POSIXIO!call IOStart(trim(job_is),chkptIO) + !POSIXIO!! + !POSIXIO!open(chkptIO,form='unformatted',action='write',position='rewind',status='replace',file=job%extFmat_file) + !POSIXIO!write(chkptIO) 'Start external field' + !POSIXIO!! + !POSIXIO!! store the matrix elements + !POSIXIO!! + !POSIXIO!write(chkptIO) PT%Maxcontracts + call MPI_File_open(mpi_comm_world, 'mpiioEXTfile', mpi_mode_wronly+mpi_mode_create, mpi_info_null, chkptMPIIO, ierr) + mpioffset=0 + call MPI_File_set_size(chkptMPIIO, mpioffset, ierr) + if (proc_rank.eq.0) then !AT + call TimerStart('mpiiosingle') !AT + + call MPI_File_write(chkptMPIIO,'Start external field',20,mpi_character,mpi_status_ignore,ierr) + ! + ! store the bookkeeping information about the contr. basis set + ! + call PTstoreMPI_icontr_cnu(PT%Maxcontracts,chkptMPIIO,job%IOkinet_action) + + call TimerStop('mpiiosingle') !AT + ! + endif ! endif ! if (trove%FBR) then ! - allocate(extF_t(mdimen,mdimen),extF_r(mdimen,mdimen),stat=alloc) - call ArrayStart('extF-fields',alloc,1,kind(f_t),rootsize) - call ArrayStart('extF-fields',alloc,1,kind(f_t),rootsize) + allocate(extF_t(mdimen_b,startdim:startdim+mdimen_p-1),extF_r(mdimen_b,startdim:startdim+mdimen_p-1),stat=alloc) + call ArrayStart('extF-fields',alloc,1,kind(f_t),blocksize) + call ArrayStart('extF-fields',alloc,1,kind(f_t),blocksize) ! job_is = 'externalF' ! @@ -15314,37 +15393,37 @@ subroutine PTcontracted_matelem_class(jrot) ! call calc_contract_matrix_elements_II(iterm,imu,1,fl,extF_r,extF_contr_matelem_single_term) ! - !$omp parallel do private(icoeff,jcoeff) shared(extF_t) schedule(dynamic) - do icoeff=1,mdimen - do jcoeff=1,icoeff - extF_t(jcoeff,icoeff) = extF_t(jcoeff,icoeff) + extF_r(jcoeff,icoeff) - enddo + do b=1,comm_size + if (send_or_recv(b).ge.0) then + !$omp parallel do private(icoeff,jcoeff) shared(b,grot_t) schedule(static) + do icoeff=startdim,enddim + do jcoeff=((b-1)*mdimen_p)+1,b*mdimen_p + extF_t(jcoeff,icoeff) = extF_t(jcoeff,icoeff) + extF_r(jcoeff,icoeff) + enddo + enddo + !$omp end parallel do + endif enddo - !$omp end parallel do ! enddo ! - !$omp parallel do private(icoeff,jcoeff) shared(extF_t) schedule(dynamic) - do icoeff=1,mdimen - do jcoeff=1,icoeff-1 - extF_t(icoeff,jcoeff) = extF_t(jcoeff,icoeff) - enddo - enddo - !$omp end parallel do - ! - if (job%IOextF_divide) then - ! - call write_divided_slice(imu,'extF',job%extmat_suffix,mdimen,extF_t) - ! - else - ! - ! always store the matrix elements of the extF moment - ! - write(chkptIO) imu - ! - write(chkptIO) extF_t - ! - endif + call co_distr_data(extF_t, recvbuf, mdimen_p, startdim, enddim) + ! + !POSIXIO!if (job%IOextF_divide) then + !POSIXIO! ! + !POSIXIO! call write_divided_slice(imu,'extF',job%extmat_suffix,mdimen,extF_t) + !POSIXIO! ! + !POSIXIO!else + !POSIXIO! ! + !POSIXIO! ! always store the matrix elements of the extF moment + !POSIXIO! ! + !POSIXIO! write(chkptIO) imu + !POSIXIO! ! + !POSIXIO! write(chkptIO) extF_t + !POSIXIO! ! + !POSIXIO!endif + if(proc_rank.eq.0) call MPI_File_write(chkptMPIIO,imu,1,mpi_integer,mpi_status_ignore,ierr) + call co_write_matrix_distr(extF_t,mdimen, startdim, enddim,chkptMPIIO) ! if (job%verbose>=4) write(out,"('...done')",advance='YES') ! @@ -15385,7 +15464,11 @@ subroutine PTcontracted_matelem_class(jrot) ! endif ! - if (.not.job%IOextF_divide) write(chkptIO) 'End external field' + !POSIXIO!if (.not.job%IOextF_divide) write(chkptIO) 'End external field' + if (proc_rank.eq.0) then !AT + if(.not.job%IOextF_divide) call MPI_File_write(chkptMPIIO,'End external field',18,mpi_character,mpi_status_ignore,ierr) + endif + call MPI_File_close(chkptMPIIO, ierr) ! endif ! @@ -32906,7 +32989,39 @@ subroutine PTstore_icontr_cnu(Maxcontracts,iunit,dir) ! end subroutine PTstore_icontr_cnu - + subroutine ptstorempi_icontr_cnu(maxcontracts,iunit,dir) + use coarray_aux + + integer(ik),intent(in) :: maxcontracts + type(mpi_file),intent(in) :: iunit + character(len=18),intent(in) :: dir + integer(ik) :: alloc + character(len=18) :: buf18 + integer(ik) :: ncontr + integer(ik),allocatable :: imat_t(:,:) + integer::ierr + ! + select case(trim(dir)) + ! + case ('SAVE') + ! + call mpi_file_write(iunit, maxcontracts, 1,mpi_integer,mpi_status_ignore,ierr) + ! + call mpi_file_write(iunit, 'icontr_cnu', 10,mpi_character,mpi_status_ignore,ierr) + ! + call mpi_file_write(iunit, pt%icontr_cnu(0:pt%nclasses,1:maxcontracts), (1+pt%nclasses)*maxcontracts, mpi_integer, & + mpi_status_ignore, ierr) + ! + call mpi_file_write(iunit, 'icontr_ideg', 11,mpi_character,mpi_status_ignore,ierr) + ! + call mpi_file_write(iunit, pt%icontr_ideg(0:pt%nclasses,1:maxcontracts), (1+pt%nclasses)*maxcontracts, mpi_integer, & + mpi_status_ignore, ierr) + + end select + + end subroutine ptstorempi_icontr_cnu + + subroutine PTdefine_contr_from_eigenvect(nroots,Neigenlevels,eigen) integer(ik),intent(in) :: nroots,Neigenlevels diff --git a/trove.f90 b/trove.f90 index e6b6149..6c7305e 100644 --- a/trove.f90 +++ b/trove.f90 @@ -13,6 +13,7 @@ module tp_module use dipole, only: dm_tranint,dm_analysis_density use refinement, only : refinement_by_fit,external_expectation_values use tran, only : TRconvert_matel_j0_eigen,TRconvert_repres_J0_to_contr + use coarray_aux, only: proc_rank,co_init_comms,co_finalize_comms implicit none @@ -184,9 +185,12 @@ subroutine ptmain return endif ! + call co_init_comms() if (job%contrci_me_fast) then ! - call PTstore_contr_matelem(j) + if (proc_rank.eq.0) then + call PTstore_contr_matelem(j) + endif ! call PTcontracted_matelem_class_fast(j) ! @@ -200,7 +204,7 @@ subroutine ptmain ! ! convert to j=0 representation as part of the first step j=0 calculation ! - if (job%convert_model_j0) then + if (proc_rank.eq.0 .and. job%convert_model_j0) then ! call TRconvert_repres_J0_to_contr(j) call TRconvert_matel_j0_eigen(j) @@ -216,6 +220,7 @@ subroutine ptmain if (job%verbose>=2) then write(out,"(/'End of TROVE')") endif + call co_finalize_comms() ! end subroutine ptmain From 74a221aaf52b7c3ce27da6a97cf846d52785d22e Mon Sep 17 00:00:00 2001 From: Arjen Tamerus Date: Mon, 18 Feb 2019 16:32:26 +0000 Subject: [PATCH 04/79] Fix MPI + gfortran, use MPI datatype for data exchange --- coarray_aux.f90 | 80 ++++++++++++++------------------------- makefile | 11 +++--- perturbation.f90 | 98 ++++++++++++++++++++++++++++++------------------ 3 files changed, 95 insertions(+), 94 deletions(-) diff --git a/coarray_aux.f90 b/coarray_aux.f90 index 1a95dba..3bba30c 100644 --- a/coarray_aux.f90 +++ b/coarray_aux.f90 @@ -1,6 +1,7 @@ module coarray_aux use mpi_f08 use timer + use accuracy implicit none public co_init_comms, co_finalize_comms, co_init_distr, co_distr_data, co_write_matrix_distr @@ -27,6 +28,7 @@ module coarray_aux integer :: co_startdim, co_enddim logical :: comms_inited = .false., distr_inited=.false. type(MPI_Datatype) :: mpitype_column + type(MPI_Datatype),dimension(:), allocatable :: mpi_blocktype contains @@ -110,8 +112,6 @@ subroutine co_gatherv_double(x) if (.not. comms_inited .or. .not. distr_inited) stop "COMMS NOT INITIALISED" if (comm_size .eq. 1) return - !write(*,*) proc_rank, proc_sizes - call TimerStart('CO_GATHERV_DOUBLE') if (proc_rank.eq.0) then call mpi_gatherv(x, 0, mpi_double_precision, x, proc_sizes, proc_offsets, mpi_double_precision, 0, mpi_comm_world) @@ -160,9 +160,6 @@ subroutine co_init_distr(dimen, startdim, enddim, blocksize) proc_index = proc_rank+1 - ! While co-arraying - !if (this_image().ne.proc_index) stop "coarray/mpi mixup" - allocate(proc_sizes(comm_size),proc_offsets(comm_size),send_or_recv(comm_size),starts(comm_size),ends(comm_size),stat=ierr) if (ierr .gt. 0) stop "CO_INIT_DISTR ALLOCATION FAILED" @@ -192,7 +189,6 @@ subroutine co_init_distr(dimen, startdim, enddim, blocksize) starts(comm_size) = (i-1) * localsize_ + 1 ends(comm_size) = dimen!comm_size*localsize_!dimen - !proc_sizes(comm_size) = (dimen-localsize*(comm_size-1)) * dimen proc_sizes(comm_size) = localsize_*comm_size*localsize_!dimen proc_offsets(comm_size) = (comm_size-1)*localsize_*(comm_size*localsize_)!dimen @@ -204,7 +200,6 @@ subroutine co_init_distr(dimen, startdim, enddim, blocksize) call mpi_bcast(proc_offsets, comm_size, mpi_integer, 0, mpi_comm_world) - !if (proc_rank.eq.0) write(*,*) "PROC_SIZES:", proc_sizes blocksize = proc_sizes(proc_index) startdim = starts(proc_index) @@ -213,37 +208,7 @@ subroutine co_init_distr(dimen, startdim, enddim, blocksize) co_startdim = startdim co_enddim = enddim - !if (mod(comm_size,2)) then - ! do i=1,comm_size - ! if (i.eq.proc_index) then - ! send_or_recv(i) = 0 - ! !elseif ( (i .ge. (proc_index - to_calc) .and. i .lt. proc_index) ) .or. & - ! elseif ( ((i.gt.(proc_index - to_calc) .and. i.lt.proc_index)) .or. & - ! ((proc_index-to_calc).lt.1 .and. (i-comm_size).gt.(proc_index-to_calc))) then - ! send_or_recv(i) = 1 ! send - ! else - ! send_or_recv(i) = -1 ! recv - ! endif - ! end do - !else - ! do i=1,comm_size - ! if (mod(i,2)) then - ! to_calc = comm_size/2+1 - ! else - ! to_calc = comm_size/2 - ! endif - - ! if (i.eq.proc_index) then - ! send_or_recv(i) = 0 - ! !elseif ( (i .ge. (proc_index - to_calc) .and. i .lt. proc_index) ) .or. & - ! elseif ( ((i.gt.(proc_index - to_calc) .and. i.lt.proc_index)) .or. & - ! ((proc_index-to_calc).lt.1 .and. (i-comm_size).gt.(proc_index-to_calc))) then - ! send_or_recv(i) = 1 ! send - ! else - ! send_or_recv(i) = -1 ! recv - ! endif - ! end do - !endif + allocate(mpi_blocktype(comm_size)) do i=1,comm_size if (mod(comm_size,2).eq.1) then @@ -258,24 +223,18 @@ subroutine co_init_distr(dimen, startdim, enddim, blocksize) endif endif - !if (proc_rank .eq. 0) write(*,*) "TOCALC:", i, to_calc, comm_size, mod(comm_size,4) if (i.eq.proc_index) then send_or_recv(i) = 0 - !!!!!!!!elseif ( (i .ge. (proc_index - to_calc) .and. i .lt. proc_index) ) .or. & elseif ( ((i.gt.(proc_index - to_calc) .and. i.lt.proc_index)) .or. & ((proc_index-to_calc).lt.1 .and. (i-comm_size).gt.(proc_index-to_calc))) then send_or_recv(i) = 1 ! send - !elseif (((i.gt.proc_index .and. i.lt.(proc_index+to_calc))) .or. (proc_index+to_calc.gt.comm_size .and. i.lt.mod(proc_index+to_calc,comm_size))) then - send_or_recv(i) = 1 ! send + call co_create_type_subarray(int(1+real(dimen/comm_size)), blocksize, int(1+real(dimen/comm_size)), i, mpi_blocktype(i)) else send_or_recv(i) = -1 ! recv endif end do - !write(*,*) "SENDRECV:", proc_index, send_or_recv - - endif deallocate(starts,ends) @@ -291,19 +250,18 @@ subroutine co_distr_data(x, tmp, blocksize, lb, ub) real(rk),dimension(:,:,:),intent(inout) :: tmp integer,intent(in) :: blocksize, lb, ub - integer :: i, icoeff, jcoeff, offset, ierr + integer :: i, icoeff, jcoeff, offset, ierr, k type(MPI_Request) :: reqs(comm_size) - - !!!write(*,*) "DISTR1", proc_rank, send_or_recv - !!!write(*,*) "DISTR2", proc_rank, blocksize, lb, ub - !!!write(*,*) "DISTR3", proc_rank, shape(x), shape(tmp) call TimerStart('MPI_transpose') call TimerStart('MPI_transpose_sendrecv') + do i=1,comm_size + reqs(i)= MPI_REQUEST_NULL + end do do i=1,comm_size if (send_or_recv(i).eq.1) then - call mpi_isend(x(((i-1)*blocksize)+1:i*blocksize,:),blocksize*blocksize,mpi_double_precision,i-1,0,mpi_comm_world,reqs(i),ierr) + call mpi_isend(x,1,mpi_blocktype(i),i-1,0,mpi_comm_world,reqs(i),ierr) elseif (send_or_recv(i).eq.-1) then call mpi_irecv(tmp(:,:,i),blocksize*blocksize,mpi_double_precision,i-1,mpi_any_tag,mpi_comm_world,reqs(i),ierr) else @@ -318,7 +276,7 @@ subroutine co_distr_data(x, tmp, blocksize, lb, ub) do i=1,comm_size if (send_or_recv(i).eq.-1) then offset = (i-1)*blocksize - !$omp parallel do private(icoeff,jcoeff) shared(i,x) schedule(static) + !$omp parallel do private(icoeff,jcoeff) shared(i,x,tmp,lb,ub,offset,blocksize) schedule(static) do icoeff=lb,ub do jcoeff=offset+1,offset+blocksize x(jcoeff,icoeff) = tmp(icoeff-lb+1,jcoeff-offset,i) @@ -372,4 +330,22 @@ subroutine co_create_type(extent) end subroutine co_create_type + subroutine co_create_type_subarray(extent, coldim, rowdim, blockid, mpi_newtype) + integer,intent(in) :: extent, coldim, rowdim, blockid + type(MPI_Datatype),intent(inout) :: mpi_newtype + integer,dimension(2) :: array_of_sizes, array_of_subsizes, array_of_starts + integer :: ierr + + array_of_sizes(1) = comm_size * extent!coldim + array_of_sizes(2) = extent + array_of_subsizes(:) = extent + array_of_starts(1) = (blockid - 1) * extent + 0 + array_of_starts(2) = 0 + + + call MPI_Type_create_subarray(2, array_of_sizes, array_of_subsizes, array_of_starts, MPI_ORDER_FORTRAN, mpi_double_precision, mpi_newtype, ierr) + call MPI_Type_commit(mpi_newtype, ierr) + + end subroutine co_create_type_subarray + end module diff --git a/makefile b/makefile index f439d62..ac5ed83 100644 --- a/makefile +++ b/makefile @@ -13,13 +13,15 @@ pot_user = pot_ch4 PLAT = _2205_i17 ###FOR = ifort FOR = mpif90 -FFLAGS = -fopenmp -ffree-line-length-none -march=native -O3 -fcray-pointer -g3 +FFLAGS = -qopenmp -xcore-avx2 -O3 -ip -g3 +#FFLAGS = -fopenmp -ffree-line-length-none -march=native -O3 -fcray-pointer -g3 #ARPACK = ~/libraries/ARPACK/libarpack_omp_64.a -#LAPACK = -mkl -LAPACK = -lopenblas +#LAPACK = -L/usr/local/software/spack/spack-0.11.2/opt/spack/linux-rhel7-x86_64/gcc-5.4.0/openblas-0.2.20-gbzlk5wei7fsojje2fiwj7w5wssikb73/lib -lopenblas +LAPACK = -mkl +#LAPACK = -L${MKLROOT}/lib/intel64 -Wl,--no-as-needed -lmkl_gf_lp64 -lmkl_sequential -lmkl_core -lpthread -lm -ldl LIB = $(LAPACK) @@ -75,10 +77,9 @@ pot_xy3.o: accuracy.o moltype.o pot_xy4.o: accuracy.o moltype.o symmetry.o pot_zxy2.o: accuracy.o moltype.o pot_zxy3.o: accuracy.o moltype.o -pot_c2h6.o: accuracy.o moltype.o +pot_c2h6.o: accuracy.o moltype.o mol_c2h6.o pot_ch3oh.o: accuracy.o moltype.o pot_c2h4.o: accuracy.o moltype.o -pot_c2h6.o: accuracy.o moltype.o pot_c3h6.o: accuracy.o moltype.o pot_abcd.o: accuracy.o moltype.o lapack.o diff --git a/perturbation.f90 b/perturbation.f90 index 1c6080e..7f34b7f 100644 --- a/perturbation.f90 +++ b/perturbation.f90 @@ -6866,7 +6866,7 @@ subroutine PThamiltonian_contract(jrot) ! task = 'rot' call PTrestore_rot_kinetic_matrix_elements(jrot,task,iunit,dimen,ncontr,maxcontr) - task = 'cor' + !task = 'cor' call PTrestore_rot_kinetic_matrix_elements(jrot,task,iunit,dimen,ncontr,maxcontr) ! call TimerStop('Restoring KE matrix') @@ -7620,7 +7620,7 @@ subroutine PThamiltonian_contract(jrot) ! if ( trim(job%IOeigen_action)=='SAVE'.or.trim(job%IOeigen_action)=='APPEND' ) then ! - call check_point_active_space('CLOSE') + if(proc_rank.eq.0) call check_point_active_space('CLOSE') ! endif ! @@ -8281,6 +8281,7 @@ end subroutine PTrestore_rot_kinetic_matrix_elements ! for the K-factorized rotational basis ! recursive subroutine symm_mat_element_vector_k(jrot,irow,ijterm,func,mat_t,no_diagonalization) + use coarray_aux integer(ik),intent(in) :: jrot,irow,ijterm(:,:) real(rk),external :: func @@ -8294,6 +8295,7 @@ recursive subroutine symm_mat_element_vector_k(jrot,irow,ijterm,func,mat_t,no_di integer(ik) :: jrow,ideg,jdeg,isym,jsym,iL,iR,iterm,jterm,icontr,jcontr real(rk) :: hcontr(PT%max_deg_size,PT%max_deg_size) real(rk) :: vec_i(PT%max_deg_size),vec_j(PT%max_deg_size) + logical :: escape ! !call TimerStart('Symmetrized Hamiltonian - one column') ! @@ -8304,6 +8306,7 @@ recursive subroutine symm_mat_element_vector_k(jrot,irow,ijterm,func,mat_t,no_di isize = PT%Index_deg(irow)%size1 ! do jrow = 1,irow + escape = .false. ! if ( present(no_diagonalization).and.no_diagonalization.and.jrow/=irow ) cycle ! @@ -8331,11 +8334,17 @@ recursive subroutine symm_mat_element_vector_k(jrot,irow,ijterm,func,mat_t,no_di ! ! Matrix elements ! + if (jcontr .lt. co_startdim .or. jcontr .gt. co_enddim) then + escape = .true. + exit + endif + hcontr(ideg,jdeg) = func(icontr,jcontr,jrot,k_i,k_j,tau_i,tau_j) ! enddo ! enddo + if (escape) cycle ! do isym = 1,sym%Nrepresen ! @@ -14841,6 +14850,8 @@ subroutine PTcontracted_matelem_class(jrot) islice = 0 job_is = 'grot' ! + ! create column datatype for MPI-IO + ! TODO clean up call co_create_type(mdimen) do k1 = 1,3 do k2 = 1,3 @@ -14867,6 +14878,7 @@ subroutine PTcontracted_matelem_class(jrot) !$omp parallel do private(icoeff,jcoeff) shared(b,grot_t) schedule(static) do icoeff=startdim,enddim do jcoeff=((b-1)*mdimen_p)+1,b*mdimen_p + if (jcoeff .gt. PT%Maxcontracts) cycle grot_t(jcoeff,icoeff) = grot_t(jcoeff,icoeff) + hrot_t(jcoeff,icoeff) enddo enddo @@ -14876,6 +14888,7 @@ subroutine PTcontracted_matelem_class(jrot) ! enddo ! + call co_distr_data(grot_t, recvbuf, mdimen_p, startdim, enddim) ! !POSIXIO!if (trim(job%IOkinet_action)=='SAVE') then !POSIXIO! if (job%IOmatelem_split) then @@ -14942,6 +14955,7 @@ subroutine PTcontracted_matelem_class(jrot) !$omp parallel do private(icoeff,jcoeff) shared(b,grot_t) schedule(static) do icoeff=startdim,enddim do jcoeff=((b-1)*mdimen_p)+1,b*mdimen_p + if (jcoeff .gt. PT%Maxcontracts) cycle grot_t(jcoeff,icoeff) = grot_t(jcoeff,icoeff) + hrot_t(jcoeff,icoeff) enddo enddo @@ -15111,7 +15125,7 @@ subroutine PTcontracted_matelem_class(jrot) if (islice=4) write(out,"('k1,k2 = ',2i8)") k1,k2 + if (job%verbose>=4) write(out,"('k1,k2 = ',3i8)") k1,k2 ! gvib_N = FLread_fields_dimension_field(job_is,k1,k2) ! @@ -15130,6 +15144,7 @@ subroutine PTcontracted_matelem_class(jrot) !$omp parallel do private(icoeff,jcoeff) shared(b,grot_t) schedule(static) do icoeff=startdim,enddim do jcoeff=((b-1)*mdimen_p)+1,b*mdimen_p + if (jcoeff .gt. PT%Maxcontracts) cycle gvib_t(jcoeff,icoeff) = gvib_t(jcoeff,icoeff) + fvib_t(jcoeff,icoeff) enddo enddo @@ -15159,6 +15174,7 @@ subroutine PTcontracted_matelem_class(jrot) !$omp parallel do private(icoeff,jcoeff) shared(b,grot_t) schedule(static) do icoeff=startdim,enddim do jcoeff=((b-1)*mdimen_p)+1,b*mdimen_p + if (jcoeff .gt. PT%Maxcontracts) cycle hvib_t(jcoeff,icoeff) = hvib_t(jcoeff,icoeff)-0.5_rk*gvib_t(jcoeff,icoeff) enddo enddo @@ -15197,6 +15213,7 @@ subroutine PTcontracted_matelem_class(jrot) !$omp parallel do private(icoeff,jcoeff) shared(b,grot_t) schedule(static) do icoeff=startdim,enddim do jcoeff=((b-1)*mdimen_p)+1,b*mdimen_p + if (jcoeff .gt. PT%Maxcontracts) cycle gvib_t(jcoeff,icoeff) = gvib_t(jcoeff,icoeff) + fvib_t(jcoeff,icoeff) enddo enddo @@ -15277,6 +15294,7 @@ subroutine PTcontracted_matelem_class(jrot) !$omp parallel do private(icoeff,jcoeff) shared(b,grot_t) schedule(static) do icoeff=startdim,enddim do jcoeff=((b-1)*mdimen_p)+1,b*mdimen_p + if (jcoeff .gt. PT%Maxcontracts) cycle hvib%me(jcoeff,icoeff) = hvib_t(jcoeff,icoeff) + gvib_t(jcoeff,icoeff) enddo enddo @@ -15398,6 +15416,7 @@ subroutine PTcontracted_matelem_class(jrot) !$omp parallel do private(icoeff,jcoeff) shared(b,grot_t) schedule(static) do icoeff=startdim,enddim do jcoeff=((b-1)*mdimen_p)+1,b*mdimen_p + if (jcoeff .gt. PT%Maxcontracts) cycle extF_t(jcoeff,icoeff) = extF_t(jcoeff,icoeff) + extF_r(jcoeff,icoeff) enddo enddo @@ -15664,7 +15683,7 @@ subroutine calc_contract_matrix_elements_II(iterm,k1,k2,fl,field,func) type(PTcoeffT),pointer :: fl !real(rk),intent(in) :: Hobject(0:,0:) !integer(ik),intent(in) :: IndexQ(:) - real(rk),intent(inout) :: field(:,:) + real(rk),intent(inout) :: field(:,startdim:) real(rk),external :: func ! !real(rk) :: matclass(1:,1:,1:) @@ -15764,45 +15783,50 @@ subroutine calc_contract_matrix_elements_II(iterm,k1,k2,fl,field,func) ! if (job%verbose>=4) call TimerStart('contract_matrix_sum_field') ! - !$omp parallel do private(icoeff,jcoeff,f_t,iclasses,iroot,jroot) shared(field) schedule(dynamic) - do icoeff=1,Maxcontracts - ! - !icase = PT%icontr2icase(icoeff,1) - !ilambda = PT%icontr2icase(icoeff,2) - ! - !ib0 = int(icoeff*(icoeff-1),hik)/2 - ! - !ib0 = icoefficoeff1(icoeff) - ! - do jcoeff=1,icoeff - ! - iroot = icoeff2iroot(1,icoeff) - jroot = icoeff2iroot(1,jcoeff) - ! - f_t = matclass(1,iroot,jroot) - ! - !f_t = 1.0_rk - ! - do iclasses = 2,Nclasses + do b=1,comm_size + if (send_or_recv(b) .ge. 0) then + !$omp parallel do private(icoeff,jcoeff,f_t,iclasses,iroot,jroot) shared(field) schedule(dynamic) + do icoeff=startdim,enddim ! - iroot = icoeff2iroot(iclasses,icoeff) - jroot = icoeff2iroot(iclasses,jcoeff) + !icase = PT%icontr2icase(icoeff,1) + !ilambda = PT%icontr2icase(icoeff,2) ! - !f_prod(iclasses) = mat_tt(iclasses)%coeffs(iroot,jroot) + !ib0 = int(icoeff*(icoeff-1),hik)/2 ! - f_t = f_t*matclass(iclasses,iroot,jroot) + !ib0 = icoefficoeff1(icoeff) ! + do jcoeff=(b-1)*mdimen_p+1,b*mdimen_p + if (jcoeff .gt. PT%Maxcontracts) cycle + ! + iroot = icoeff2iroot(1,icoeff) + jroot = icoeff2iroot(1,jcoeff) + ! + f_t = matclass(1,iroot,jroot) + ! + !f_t = 1.0_rk + ! + do iclasses = 2,Nclasses + ! + iroot = icoeff2iroot(iclasses,icoeff) + jroot = icoeff2iroot(iclasses,jcoeff) + ! + !f_prod(iclasses) = mat_tt(iclasses)%coeffs(iroot,jroot) + ! + f_t = f_t*matclass(iclasses,iroot,jroot) + ! + enddo + ! + !f_t = product(matclass(1:Nclasses,icoeff2iroot(1:Nclasses,icoeff),icoeff2iroot(1:Nclasses,jcoeff))) + ! + !field(jcoeff,icoeff) = field(jcoeff,icoeff)+f_t + ! + field(jcoeff,icoeff) = f_t + ! + enddo enddo - ! - !f_t = product(matclass(1:Nclasses,icoeff2iroot(1:Nclasses,icoeff),icoeff2iroot(1:Nclasses,jcoeff))) - ! - !field(jcoeff,icoeff) = field(jcoeff,icoeff)+f_t - ! - field(jcoeff,icoeff) = f_t - ! - enddo + !$omp end parallel do + endif enddo - !$omp end parallel do ! if (job%verbose>=4) call TimerStop('contract_matrix_sum_field') ! From f121d38c3af94b2a0417715bffa5c2a542524e3f Mon Sep 17 00:00:00 2001 From: Arjen Tamerus Date: Fri, 22 Feb 2019 15:21:56 +0000 Subject: [PATCH 05/79] co_write_matrix_distr: don't write out empty buffer columns --- coarray_aux.f90 | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/coarray_aux.f90 b/coarray_aux.f90 index 3bba30c..d92c4c0 100644 --- a/coarray_aux.f90 +++ b/coarray_aux.f90 @@ -313,8 +313,12 @@ subroutine co_write_matrix_distr(x, longdim, lb, ub, outfile) mpioffset = mpioffset + proc_rank * (longdim * int(1+real(longdim/comm_size),mpi_offset_kind) * mpi_real_size) - writecount = int(1+real(longdim/comm_size)) mpi_write_offsetkind = int(1+real(longdim/comm_size),MPI_Offset_kind) + if (proc_rank .lt. (comm_size-1)) then + writecount = int(1+real(longdim/comm_size)) + else + writecount = longdim-((comm_size-1)*int(1+real(longdim/comm_size))) + endif call MPI_File_write_at_all(outfile,mpioffset,x,writecount,mpitype_column,writestat,ierr) call TimerStop('MPI_write') From 970c162437c2364ed8008ac37cfdcaeb85691d8a Mon Sep 17 00:00:00 2001 From: Arjen Tamerus Date: Fri, 22 Feb 2019 15:29:59 +0000 Subject: [PATCH 06/79] perturbation.f90 - WIP MPI-IO file formatting work + write fixes --- perturbation.f90 | 662 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 656 insertions(+), 6 deletions(-) diff --git a/perturbation.f90 b/perturbation.f90 index 7f34b7f..b1b366f 100644 --- a/perturbation.f90 +++ b/perturbation.f90 @@ -6532,6 +6532,7 @@ subroutine PThamiltonian_contract(jrot) character(len=cl) :: unitfname,filename,statusf,symchar logical :: only_store = .false. logical :: no_diagonalization = .false. + type(MPI_File) :: mpiiofile ! ! A special case when the diagonlization is to be skipped ! @@ -6849,6 +6850,9 @@ subroutine PThamiltonian_contract(jrot) ! task = 'top' call PTrestore_rot_kinetic_matrix_elements(jrot,task,iunit,dimen,ncontr,maxcontr) + !call PTrestore_rot_kinetic_matrix_elements_mpi(jrot,task,mpiiofile,dimen,& + ! PT, PTvibrational_me_calc,grot,gcor,hvib, & + ! ncontr,maxcontr) ! ! We have two calculation options: fast and cheap and slow but expensive. ! @@ -6866,8 +6870,14 @@ subroutine PThamiltonian_contract(jrot) ! task = 'rot' call PTrestore_rot_kinetic_matrix_elements(jrot,task,iunit,dimen,ncontr,maxcontr) - !task = 'cor' + !call PTrestore_rot_kinetic_matrix_elements_mpi(jrot,task,mpiiofile,dimen,& + ! PT, PTvibrational_me_calc,grot,gcor,hvib, & + ! ncontr,maxcontr) + task = 'cor' call PTrestore_rot_kinetic_matrix_elements(jrot,task,iunit,dimen,ncontr,maxcontr) + !call PTrestore_rot_kinetic_matrix_elements_mpi(jrot,task,mpiiofile,dimen,& + ! PT, PTvibrational_me_calc,grot,gcor,hvib, & + ! ncontr,maxcontr) ! call TimerStop('Restoring KE matrix') ! @@ -6883,7 +6893,10 @@ subroutine PThamiltonian_contract(jrot) ! task = 'vib' call PTrestore_rot_kinetic_matrix_elements(jrot,task,iunit,dimen,ncontr,maxcontr) - ! + !call PTrestore_rot_kinetic_matrix_elements_mpi(jrot,task,mpiiofile,dimen,& + ! PT, PTvibrational_me_calc,grot,gcor,hvib, & + ! ncontr,maxcontr) + call TimerStop('Restoring KE matrix') ! if (job%verbose>=5) write(out,"(/' ...done!')") @@ -7650,6 +7663,629 @@ subroutine PThamiltonian_contract(jrot) ! end subroutine PThamiltonian_contract ! + + !!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + !!!!!!!!! MPIIO !!!!!!!!!!!!!! + !!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + + subroutine open_chkptfile_mpi(fileh, filename, mode) + use mpi_f08 + use coarray_aux + + type(MPI_File),intent(inout) :: fileh + character(len=*),intent(in) :: filename, mode + + integer :: amode, ierr + + select case(mode) + case('read') + amode = mpi_mode_rdonly + case('write') + amode = mpi_mode_wronly+mpi_mode_create + end select + + call MPI_File_Open(mpi_comm_world, filename, amode, mpi_info_null, fileh, ierr) + if (ierr) then + if (proc_rank .eq. 0) write(*,*) "Error opening MPI-IO-formatted Vib. kinetic checkpoint file." + stop "MPI_PTrestore_rot_kinetic_matrix_elements - Error opening MATELEM MPI-IO input file" + endif + + !File errors indicate big trouble, so we set errors to be fatal - not likely to be recoverable + call MPI_File_set_errhandler(fileh, MPI_ERRORS_ARE_FATAL) + end subroutine open_chkptfile_mpi + + subroutine close_chkptfile_mpi(fileh) + use mpi_f08 + use coarray_aux + + type(MPI_File), intent(inout) :: fileh + integer :: ierr + + call mpi_file_close(fileh, ierr) + if (ierr) then + if (proc_rank .eq. 0) write(*,*) "Error closing MPI-IO-formatted Vib. kinetic checkpoint file." + stop "MPI_PTrestore_rot_kinetic_matrix_elements - Error closing MATELEM MPI-IO input file" + endif + end subroutine close_chkptfile_mpi + + subroutine PTrestore_rot_kinetic_matrix_elements_MPI(jrot, task, fileh, dimen, & + PT, PTvibrational_me_calc,grot,gcor,hvib, & + ncontr, maxcontr, icontr) + use mpi_f08 + use coarray_aux + integer(ik),intent(in) :: jrot + character(len=cl),intent(in) :: task + type(MPI_File),intent(inout) :: fileh + integer(ik),intent(in) :: dimen + + type(PTelementsT),intent(in) :: PT + logical,intent(in) :: PTvibrational_me_calc + type(PTcontrME),pointer :: grot(:,:),gcor(:) ! rot. kinetic part + type(PTcontrME) :: hvib ! rot. kinetic part + + integer(ik),intent(inout),optional :: ncontr + integer(ik),intent(inout),optional :: maxcontr + integer(ik),intent(in),optional :: icontr + + type(MPI_File) :: fileh_slice + character(len=cl) :: job_id,filename,readbuf + integer(kind=MPI_Offset_kind) :: file_offset + integer :: ierr + integer(hik) :: rootsize,rootsize_,rootsize2,rootsize2_,nprocs,tid,icontr1,icontr2 + integer(ik),allocatable :: imat_t(:,:) + real(rk),allocatable :: mat_t(:,:),mat_(:,:) + real(rk) :: f_t + + integer :: k1,k2,islice + + if ( trim(job%IOkinet_action)/='VIB_READ'.and.trim(job%IOkinet_action)/='READ'.and..not.job%contrci_me_fast) return + + if ( trim(job%IOswap_matelem)/='NONE') return + + if (proc_rank .eq. 0 .and. (job%verbose>=6.and.present(icontr)) ) write(out,"('icontr = ',i)") icontr + + if (proc_rank .eq. 0) write(*,*) "DEBUG|TASK =", task + + select case (trim(task)) + case('top') + + job_id = '[MPI-IO] Vib. matrix elements of the rot. kinetic' + !TODO - MPI-compatible IOStart + !call IOStart(trim(job_id),fileh) + + !TODO set filename dynamically + filename = 'mpiiofile' + + call open_chkptfile_mpi(fileh, filename, 'read') + + !Collective read woo + call MPI_File_read_all(fileh, readbuf, 7, mpi_character, mpi_status_ignore, ierr) + + if (readbuf(1:7) /= '[MPIIO]') then + if (proc_rank .eq. 0) write(*,*) "Invalid MPIIO identifier to MPI-IO-formatted Vib. kinetic checkpoint file." + call mpi_barrier(mpi_comm_world, ierr) + stop "MPI_PTrestore_rot_kinetic_matrix_elements - bogus file format header" + endif + + call MPI_File_read_all(fileh, readbuf, 18, mpi_character, mpi_status_ignore, ierr) + + if (readbuf(1:18) /= 'Start Kinetic part') then + if (proc_rank .eq. 0) write(*,*) "Invalid header to MPI-IO-formatted Vib. kinetic checkpoint file." + call mpi_barrier(mpi_comm_world, ierr) + stop "MPI_PTrestore_rot_kinetic_matrix_elements - bogus file format" + endif + + call MPI_File_read_all(fileh, ncontr, 1, mpi_integer, mpi_status_ignore, ierr) + + if (jrot==0.and.PT%Maxcontracts/=ncontr) then + if (proc_rank .eq. 0) write (out,"(' Vib. kinetic checkpoint file ',a)") job%kinetmat_file + if (proc_rank .eq. 0) write (out,"(' Actual and stored basis sizes at J=0 do not agree ',2i)") PT%Maxcontracts,ncontr + call mpi_barrier(mpi_comm_world, ierr) + stop 'PTrestore_rot_kinetic_matrix_elements - in file - illegal nroots ' + end if + + rootsize = int(ncontr*(ncontr+1)/2,hik) + rootsize2 = int(ncontr*ncontr,hik) + + if (proc_rank .eq. 0.and.job%verbose>=6) write(out,"(/'Restore_rot_kin...: Number of elements: ',i8)") PT%Maxcontracts + + ! Read the indexes of the J=0 contracted basis set. + + if (.not.FLrotation.or.jrot==0) then + + allocate (imat_t(0:PT%Nclasses,ncontr),stat=ierr) + call ArrayStart('mat_t',ierr,size(imat_t),kind(imat_t)) + + call MPI_File_read_all(fileh, readbuf, 10, mpi_character, mpi_status_ignore, ierr) + if (readbuf(1:10)/='icontr_cnu') then + if (proc_rank .eq. 0) write (out,"(' Vib. kinetic checkpoint file ',a,': icontr_cnu is missing ',a)") job%kinetmat_file,readbuf(1:10) + call mpi_barrier(mpi_comm_world, ierr) + stop 'PTrestore_rot_kinetic_matrix_elements - in file - icontr_cnu missing' + end if + + call MPI_File_read_all(fileh, imat_t, (PT%Nclasses+1)*ncontr, mpi_integer, mpi_status_ignore, ierr) + + call MPI_File_read_all(fileh, readbuf, 10, mpi_character, mpi_status_ignore, ierr) + if (readbuf(1:11)/='icontr_ideg') then + if (proc_rank .eq. 0) write (out,"(' Vib. kinetic checkpoint file ',a,': icontr_ideg is missing ',a)") job%kinetmat_file,readbuf(1:11) + call mpi_barrier(mpi_comm_world, ierr) + stop 'PTrestore_rot_kinetic_matrix_elements - in file - icontr_ideg missing' + end if + + call MPI_File_read_all(fileh, imat_t, (PT%Nclasses+1)*ncontr, mpi_integer, mpi_status_ignore, ierr) + + deallocate(imat_t) + + else + + allocate (PT%icontr_cnu(0:PT%Nclasses,ncontr),PT%icontr_ideg(0:PT%Nclasses,ncontr),stat=ierr) + call ArrayStart('PT%contractive_space',ierr,size(PT%icontr_cnu),kind(PT%icontr_cnu)) + call ArrayStart('PT%contractive_space',ierr,size(PT%icontr_ideg),kind(PT%icontr_ideg)) + + deallocate(PT%icontr2icase) + call Arraystop('PT%contractive_space') + + allocate (PT%icontr2icase(ncontr,2),stat=ierr) + call ArrayStart('PT%contractive_space',ierr,size(PT%icontr2icase),kind(PT%icontr2icase)) + + call MPI_File_read_all(fileh, readbuf, 10, mpi_character, mpi_status_ignore, ierr) + if (readbuf(1:10)/='icontr_cnu') then + if (proc_rank .eq. 0) write (out,"(' Vib. kinetic checkpoint file ',a,': icontr_cnu is missing ',a)") job%kinetmat_file,readbuf(1:10) + call mpi_barrier(mpi_comm_world, ierr) + stop 'PTrestore_rot_kinetic_matrix_elements - in file - icontr_cnu missing' + end if + + call MPI_File_read_all(fileh, PT%icontr_cnu, (PT%Nclasses+1)*ncontr, mpi_integer, mpi_status_ignore, ierr) + + call MPI_File_read_all(fileh, readbuf, 11, mpi_character, mpi_status_ignore, ierr) + if (readbuf(1:11)/='icontr_ideg') then + if (proc_rank .eq. 0) write (out,"(' Vib. kinetic checkpoint file ',a,': icontr_ideg is missing ',a)") job%kinetmat_file,readbuf(1:11) + call mpi_barrier(mpi_comm_world, ierr) + stop 'PTrestore_rot_kinetic_matrix_elements - in file - icontr_ideg missing' + end if + + call MPI_File_read_all(fileh, PT%icontr_ideg, (PT%Nclasses+1)*ncontr, mpi_integer, mpi_status_ignore, ierr) + + endif + + if (job%vib_rot_contr.and.PTvibrational_me_calc) then + + call MPI_File_read_all(fileh, readbuf, 7, mpi_character, mpi_status_ignore, ierr) + if (readbuf(1:7)/='vib-rot') then + if (proc_rank .eq. 0) write (out,"(' Vib. kinetic checkpoint file ',a,': label vib-rot is missing ',a)") job%kinetmat_file,readbuf(1:7) + call mpi_barrier(mpi_comm_world, ierr) + stop 'PTrestore_rot_kinetic_matrix_elements - in file - vib-rot missing' + end if + + call close_chkptfile_mpi(fileh) + + write(job_id,"('single swap_matrix')") + + ! TODO mpi-aware iostart + !call IOStart(trim(job_is),chkptIO) + + filename = trim(job%matelem_suffix)//"0"//'.chk' + + call open_chkptfile_mpi(fileh, filename, 'read') + + endif + + ! reconstruct the correlation between the vib. indices for J=0 and current J + + !TODO_FOREIGNFUNCTION! call Find_groundstate_icontr(maxcontr) + + if (proc_rank .eq. 0 .and. job%verbose>=4) write(out,"(' ...done!')") + + if (proc_rank .eq. 0 .and. job%verbose>=4.and.maxcontr/=ncontr) then + write (out,"(' The contracted basis set is reduced: ',i,' -> ',i)") ncontr,maxcontr + end if + + if (maxcontr>ncontr) then + if (proc_rank .eq. 0) write (out,"(' Actual and stored basis sizes at J=0 do not agree (maxcontr,ncontr) ',2i8)") maxcontr,ncontr + call mpi_barrier(mpi_comm_world, ierr) + stop 'PTrestore_rot_kinetic_matrix_elements - in file - illegal ncontr ' + end if + case('rot') + ! + if (proc_rank .eq. 0 .and. job%verbose>=4) write(out,"(' Read and process rotational part...')") + ! + ! Read the rotational part only + ! + allocate(mat_(maxcontr,maxcontr),stat=ierr) + call ArrayStart('PThamiltonian_contract: mat_',ierr,1,kind(mat_),rootsize2_) + ! + if (.not.job%IOmatelem_split) then + ! + call MPI_File_read_all(fileh, readbuf, 5, mpi_character, mpi_status_ignore, ierr) + if (readbuf(1:5)/='g_rot') then + if(proc_rank .eq. 0) write (out,"(' Vib. kinetic checkpoint file ',a,': g_rot is missing ',a)") job%kinetmat_file,readbuf(1:5) + call mpi_barrier(mpi_comm_world, ierr) + stop 'PTrestore_rot_kinetic_matrix_elements - in file - g_rot missing' + end if + ! + endif + ! + allocate(grot(3,3),stat=ierr) + ! + islice = 0 + ! + do k1 = 1,3 + ! + do k2 = 1,3 + ! + islice = islice + 1 + ! + call divided_slice_open_mpi(islice,fileh_slice,'g_rot',job%matelem_suffix) + ! + allocate(grot(k1,k2)%me(maxcontr,maxcontr),stat=ierr) + call ArrayStart('grot-matrix',ierr,1,kind(f_t),rootsize2_) + ! + call MPI_File_read_all(fileh, mat_(1:ncontr,1:ncontr), ncontr*ncontr, mpi_double_precision, mpi_status_ignore, ierr) + ! + grot(k1,k2)%me(1:ncontr,1:ncontr) = mat_(1:ncontr,1:ncontr) + ! + call divided_slice_close_mpi(islice,fileh_slice,'g_rot') + ! + enddo + enddo + ! + deallocate(mat_) + call ArrayStop('PThamiltonian_contract: mat_') + ! + if (job%verbose>=4) write(out,"(' ...done!')") + ! + case('cor') + ! + if (proc_rank .eq. 0 .and. job%verbose>=4) write(out,"(' Read and process Coriolis part...')") + ! + allocate(mat_(maxcontr,maxcontr),stat=ierr) + call ArrayStart('PThamiltonian_contract: mat_',ierr,1,kind(mat_),rootsize2_) + ! + if (.not.job%IOmatelem_split) then + ! + call MPI_File_read_all(fileh, readbuf, 5, mpi_character, mpi_status_ignore, ierr) + if (readbuf(1:5)/='g_cor') then + if (proc_rank .eq. 0) write (out,"(' Vib. kinetic checkpoint file ',a,': g_cor is missing ',a)") job%kinetmat_file,readbuf(1:5) + call mpi_barrier(mpi_comm_world, ierr) + stop 'PTrestore_rot_kinetic_matrix_elements - in file - g_cor missing' + end if + ! + endif + ! + allocate(gcor(3),stat=ierr) + ! + islice = 9 + ! + do k1 = 1,3 + ! + islice = islice + 1 + ! + allocate(gcor(k1)%me(maxcontr,maxcontr),stat=ierr) + call ArrayStart('gcor-matrix',ierr,1,kind(f_t),rootsize2_) + ! + call divided_slice_open_mpi(islice,fileh_slice,'g_cor',job%matelem_suffix) + ! + call MPI_File_read_all(fileh, mat_(1:ncontr,1:ncontr), ncontr*ncontr, mpi_double_precision, mpi_status_ignore, ierr) + ! + gcor(k1)%me(1:ncontr,1:ncontr) = mat_(1:ncontr,1:ncontr) + ! + call divided_slice_close_mpi(islice,fileh_slice,'g_cor') + ! + enddo + ! + deallocate(mat_) + call ArrayStop('PThamiltonian_contract: mat_') + ! + if (proc_rank .eq. 0 .and. job%verbose>=4) write(out,"(' ...done!')") + ! + case('vib') + ! + if (job%verbose>=4) write(out,"(' Read and process vibrational part...')") + ! + if (.not.job%IOmatelem_split.and.( (.not.FLrotation.or.jrot==0).and.trim(job%IOkinet_action)/='VIB_READ' ) ) then + ! + allocate(mat_t(maxcontr,maxcontr),stat=ierr) + call ArrayStart('PThamiltonian_contract: mat_t',ierr,1,kind(mat_t),rootsize2_) + ! + call MPI_File_read_all(fileh, readbuf, 5, mpi_character, mpi_status_ignore, ierr) + if (readbuf(1:5)/='g_rot') then + if (proc_rank .eq. 0) write (out,"(' Vib. kinetic checkpoint file ',a,': g_rot is missing ',a)") job%kinetmat_file,readbuf(1:5) + call mpi_barrier(mpi_comm_world, ierr) + stop 'PTrestore_rot_kinetic_matrix_elements - in file - g_rot missing' + end if + ! + do k1 = 1,3 + do k2 = 1,3 + ! + call MPI_File_read_all(fileh, mat_(1:ncontr,1:ncontr), ncontr*ncontr, mpi_double_precision, mpi_status_ignore, ierr) + ! + enddo + enddo + ! + call MPI_File_read_all(fileh, readbuf, 5, mpi_character, mpi_status_ignore, ierr) + if (readbuf(1:5)/='g_cor') then + if (proc_rank .eq. 0) write (out,"(' Vib. kinetic checkpoint file ',a,': g_cor is missing ',a)") job%kinetmat_file,readbuf(1:5) + call mpi_barrier(mpi_comm_world, ierr) + stop 'PTrestore_rot_kinetic_matrix_elements - in file - g_cor missing' + end if + ! + !do k1 = 1,PT%Nmodes + do k2 = 1,3 + ! + call MPI_File_read_all(fileh, mat_(1:ncontr,1:ncontr), ncontr*ncontr, mpi_double_precision, mpi_status_ignore, ierr) + ! + enddo + !enddo + ! + deallocate(mat_t) + call ArrayStop('mat_t') + ! + endif + ! + if (proc_rank .eq. 0 .and. job%verbose>=6) write(out,"(' rootsize_,rootsize = ',2i)") rootsize_,rootsize + ! + call MPI_File_read_all(fileh, readbuf, 4, mpi_character, mpi_status_ignore, ierr) + if (readbuf(1:4)/='hvib') then + if (proc_rank .eq. 0) write (out,"(' Vib. kinetic checkpoint file ',a,': hvib is missing ',a)") job%kinetmat_file,readbuf(1:4) + if (proc_rank .eq. 0 .and. readbuf(1:4)=='g_ro') then + write (out,"(' Most likely non-divided chk-points used with MATELEM READ SPLIT')") + write (out,"(' Re-do MATELEM SAVE SPLIT or use MATELEM SPLIT READ')") + endif + call mpi_barrier(mpi_comm_world, ierr) + stop 'PTrestore_rot_kinetic_matrix_elements - in file - hvib or End missing' + end if + ! + allocate(mat_(maxcontr,maxcontr),stat=ierr) + call ArrayStart('PThamiltonian_contract: mat_',ierr,1,kind(mat_),rootsize2_) + ! + allocate(hvib%me(maxcontr,maxcontr),stat=ierr) + call ArrayStart('hvib-matrix',ierr,1,kind(f_t),rootsize2_) + ! + call MPI_File_read_all(fileh, mat_(1:ncontr,1:ncontr), ncontr*ncontr, mpi_double_precision, mpi_status_ignore, ierr) + ! + hvib%me(1:ncontr,1:ncontr) = mat_(1:ncontr,1:ncontr) + ! + deallocate(mat_) + call ArrayStop('PThamiltonian_contract: mat_') + ! + call MPI_File_read_all(fileh, readbuf, 16, mpi_character, mpi_status_ignore, ierr) + if (readbuf(1:16)/='End Kinetic part') then + if (proc_rank .eq. 0) write (out,"(' Vib. kinetic checkpoint file ',a,' has bogus footer: ',a)") job%kinetmat_file,readbuf(1:16) + call mpi_barrier(mpi_comm_world, ierr) + stop 'PTrestore_rot_kinetic_matrix_elements - bogus file format' + end if + ! + call close_chkptfile_mpi(fileh) + ! + if (proc_rank .eq. 0 .and. job%verbose>=4) write(out,"(' ...done!')") + ! + !!case('top-icontr') + !! ! + !! nprocs = 1 + !! tid = 0 + !! !$omp parallel private(tid) + !! tid = omp_get_thread_num() + !! if (tid==0) then + !! nprocs = omp_get_num_threads() + !! endif + !! !$omp end parallel + !! ! + !! if (FLrotation.and.jrot/=0) then + !! ! + !! allocate(grot(3,3),stat=ierr) + !! ! + !! rootsize2_ = int(maxcontr,hik)*int(PT%max_deg_size,hik)*9_hik*int(nprocs,hik) + !! call ArrayStart('PThamiltonian_contract: grot',ierr,1,rk,rootsize2_) + !! ! + !! allocate(gcor(3),stat=ierr) + !! rootsize2_ = int(maxcontr,hik)*int(PT%max_deg_size,hik)*int(PT%Nmodes,hik)*3_hik*int(nprocs,hik) + !! call ArrayStart('PThamiltonian_contract: grot',ierr,1,rk,rootsize2_) + !! ! + !! if (job%vib_rot_contr) then + !! ! + !! do islice = 1,9+3*PT%Nmodes + !! call divided_slice_open_vib_rot(islice,job%matelem_suffix) + !! enddo + !! ! + !! endif + !! ! + !! endif + !! ! + !!case('rot-icontr') ! rotational part for the vib-rot contraction scheme + !! ! + !! ! Read the rotational part only + !! ! + !! if (.not.job%IOmatelem_split ) then + !! ! + !! write (out,"('PTrestore_rot_kinetic_matrix_elements: vib-rot can be used with MATELEM SAVE SPLIT only ')") + !! stop 'PTrestore_rot_kinetic_matrix_elements: vib-rot can be used with SPLIT only' + !! ! + !! endif + !! ! + !! if (.not.job%vib_rot_contr) return + !! ! + !! icontr1 = PT%Ncontr02icase0(icontr,1) + !! icontr2 = PT%Ncontr02icase0(icontr,2) + !! ! + !! islice = 0 + !! ! + !! do k1 = 1,3 + !! do k2 = 1,3 + !! ! + !! islice = islice + 1 + !! ! + !! nullify(grot(k1,k2)%me) + !! ! + !! if (associated(grot(k1,k2)%me)) deallocate(grot(k1,k2)%me) + !! ! + !! allocate(grot(k1,k2)%me(maxcontr,icontr1:icontr2),stat=ierr) + !! ! + !! write(job_is,"('single swap_matrix #',i8)") islice + !! call IOStart(trim(job_is),chkptIO_) + !! ! + !! read(chkptIO_) grot(k1,k2)%me + !! ! + !! enddo + !! enddo + !! ! + !!case('cor-icontr') ! corriolis part for the vib-rot contraction scheme + !! ! + !! ! Read the Corriolis part only + !! ! + !! ! + !! if (.not.job%IOmatelem_split ) then + !! ! + !! write (out,"('PTrestore_rot_kinetic_matrix_elements: vib-rot can be used with MATELEM SAVE SPLIT only ')") + !! stop 'PTrestore_rot_kinetic_matrix_elements: vib-rot can be used with SPLIT only' + !! ! + !! endif + !! ! + !! if (.not.job%vib_rot_contr) then + !! ! + !! write (out,"('PTrestore_rot_kinetic_matrix_elements: vib-rot can be used with cor-icontr only ')") + !! stop 'PTrestore_rot_kinetic_matrix_elements: vib-rot can be used with cor-icontr only' + !! ! + !! endif + !! ! + !! icontr1 = PT%Ncontr02icase0(icontr,1) + !! icontr2 = PT%Ncontr02icase0(icontr,2) + !! ! + !! islice = 9 + !! ! + !! do k1 = 1,3 + !! ! + !! islice = islice + 1 + !! ! + !! nullify(gcor(k1)%me) + !! ! + !! if (associated(gcor(k1)%me)) deallocate(gcor(k1)%me) + !! ! + !! allocate(gcor(k1)%me(maxcontr,icontr1:icontr2),stat=ierr) + !! ! + !! write(job_is,"('single swap_matrix #',i8)") islice + !! call IOStart(trim(job_is),chkptIO_) + !! ! + !! read(chkptIO_) gcor(k1)%me + !! ! + !! enddo + !! ! + !!case('vib-icontr') ! vibrational part for the vib-rot contraction scheme + !! ! + !! ! + !! !if (job%verbose>=4.and.irow==0) write(out,"(' Read and process vibrational part...')") + !! ! + !! if (.not.job%IOmatelem_split.and.( (.not.FLrotation.or.jrot==0).and.trim(job%IOkinet_action)/='VIB_READ' ) ) then + !! ! + !! if (job%vib_rot_contr) then + !! write (out,"('PTrestore_rot_kinetic_matrix_elements: vib-rot can be used with MATELEM SAVE SPLIT only ')") + !! stop 'PTrestore_rot_kinetic_matrix_elements: vib-rot can be used with SPLIT only' + !! endif + !! ! + !! endif + !! ! + !! if (icontr==1) then + !! ! + !! read(chkptIO) readbuf(1:4) + !! if (readbuf(1:4)/='hvib') then + !! write (out,"(' Vib. kinetic checkpoint file ',a,': hvib is missing ',a)") job%kinetmat_file,readbuf(1:4) + !! if (readbuf(1:4)=='g_ro') write (out,"(' Most likely non-divided chk-points used with MATELEM READ DIVIDED')") + !! write (out,"(' Re-do MATELEM SAVE DIVIDE or use MATELEM READ!')") + !! stop 'PTrestore_rot_kinetic_matrix_elements - in file - hvib or End missing' + !! end if + !! ! + !! endif + !! ! + !! if (associated(hvib%me)) deallocate(hvib%me) + !! ! + !! call ArrayStart('hvib-matrix',0,1,4) + !! call ArrayStart('grot-matrix',0,1,4) + !! call ArrayStart('gcor-matrix',0,1,4) + !! call ArrayStop('hvib-matrix') + !! call ArrayStop('grot-matrix') + !! call ArrayStop('gcor-matrix') + !! ! + !! icontr1 = PT%Ncontr02icase0(icontr,1) + !! icontr2 = PT%Ncontr02icase0(icontr,2) + !! ! + !! if (job%verbose>=6) write(out,"('allocate hvib for ',i9,' x ',i8,' -> ',i8)") maxcontr,icontr1,icontr2 + !! ! + !! allocate(hvib%me(maxcontr,icontr1:icontr2),stat=ierr) + !! call ArrayStart('hvib-matrix',ierr,1,kind(f_t),rootsize2_) + !! ! + !! read(chkptIO) hvib%me + !! ! + !! maxcontr0 = size(PT%Ncontr02icase0,dim=1) + !! ! + !! if (icontr==maxcontr0) then + !! ! + !! read(chkptIO) readbuf(1:4) + !! if (readbuf(1:4)/='hvib') then + !! write (out,"(' Vib. kinetic checkpoint file ',a,' has bogus footer: ',a)") job%kinetmat_file,readbuf(1:16) + !! stop 'PTrestore_rot_kinetic_matrix_elements - bogus file format' + !! end if + !! ! + !! close(chkptIO,status='keep') + !! ! + !! if (job%verbose>=4) write(out,"(' ...done!')") + !! ! + !! endif + !! ! + end select + end subroutine + + subroutine divided_slice_open_mpi(islice,fileh,chkpt_type,suffix) + use mpi_f08 + use coarray_aux + integer(ik),intent(in) :: islice + type(MPI_File),intent(inout) :: fileh + character(len=*),intent(in) :: chkpt_type,suffix + + integer(ik) :: ilen + character(len=cl) :: readbuf,filename,islice_c + integer :: ierr + + write(islice_c, '(i4)') islice + ! + filename = trim(suffix)//trim(adjustl(islice_c))//'.chk' + ! + call open_chkptfile_mpi(fileh, filename, 'read') + + ilen = LEN_TRIM(chkpt_type) + + call MPI_File_read_all(fileh, readbuf, 5, mpi_character, mpi_status_ignore, ierr) + if ( trim(readbuf(1:ilen))/=trim(chkpt_type) ) then + if (proc_rank .eq. 0) write (out,"(' kinetic checkpoint slice ',a20,': header is missing or wrong',a)") filename,readbuf(1:ilen) + stop 'PTrestore_rot_kinetic_matrix_elements - in slice - header missing or wrong' + end if + end subroutine divided_slice_open_mpi + + subroutine divided_slice_close_mpi(islice,fileh,chkpt_type) + use mpi_f08 + use coarray_aux + integer(ik),intent(in) :: islice + type(MPI_File),intent(inout) :: fileh + character(len=*),intent(in) :: chkpt_type + + integer(ik) :: ilen + character(len=cl) :: readbuf + integer :: ierr + + if (.not.job%IOmatelem_split) return + ! + ilen = LEN_TRIM(chkpt_type) + ! + call MPI_File_read_all(fileh, readbuf, ilen, mpi_character, mpi_status_ignore, ierr) + if ( trim(readbuf(1:ilen))/=trim(chkpt_type) ) then + if(proc_rank .eq. 0) write (out,"(' divided_slice_close, kinetic checkpoint slice: footer is missing or wrong',a)") readbuf(1:ilen) + stop 'divided_slice_close - in slice - footer missing or wrong' + end if + ! + call close_chkptfile_mpi(fileh) + end subroutine divided_slice_close_mpi + + !!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + !!!!!!!!! MPIIO !!!!!!!!!!!!!! + !!!!!!!!!!!!!!!!!!!!!!!!!!!!!! ! ! Here we restore the vibrational (J=0) matrix elements of the rotational kinetic part G_rot and G_cor ! @@ -14652,6 +15288,7 @@ subroutine PTcontracted_matelem_class(jrot) if (proc_rank.eq.0) then !AT call TimerStart('mpiiosingle') !AT + call MPI_File_write(chkptMPIIO,'[MPIIO]',7,mpi_character,mpi_status_ignore,ierr) call MPI_File_write(chkptMPIIO,'Start Kinetic part',18,mpi_character,mpi_status_ignore,ierr) ! ! store the bookkeeping information about the contr. basis set @@ -14838,7 +15475,10 @@ subroutine PTcontracted_matelem_class(jrot) if (trim(job%IOkinet_action)=='SAVE'.and..not.job%IOmatelem_split) then ! !POSIXIO!write(chkptIO) 'g_rot' - if(proc_rank.eq.0) call MPI_File_write(chkptMPIIO,'g_rot',5,mpi_character,mpi_status_ignore,ierr) + if(proc_rank.eq.0) then + call MPI_File_seek(chkptMPIIO, mpioffset, MPI_SEEK_END) + call MPI_File_write(chkptMPIIO,'g_rot',5,mpi_character,mpi_status_ignore,ierr) + endif ! endif ! @@ -14904,6 +15544,7 @@ subroutine PTcontracted_matelem_class(jrot) !POSIXIO! endif !POSIXIO!endif call co_write_matrix_distr(grot_t,mdimen, startdim, enddim,chkptMPIIO) + if (proc_rank.eq.0) write(*,*) grot_t(1,1), grot_t(6533,1) ! enddo enddo @@ -14913,7 +15554,10 @@ subroutine PTcontracted_matelem_class(jrot) if (trim(job%IOkinet_action)=='SAVE'.and..not.job%IOmatelem_split) then ! !POSIXIO!write(chkptIO) 'g_cor' - if(proc_rank.eq.0) call MPI_File_write(chkptMPIIO,'g_cor',5,mpi_character,mpi_status_ignore,ierr) + if(proc_rank.eq.0) then + call MPI_File_seek(chkptMPIIO, mpioffset, MPI_SEEK_END) + call MPI_File_write(chkptMPIIO,'g_cor',5,mpi_character,mpi_status_ignore,ierr) + endif ! endif ! @@ -15320,7 +15964,10 @@ subroutine PTcontracted_matelem_class(jrot) ! !POSIXIO!write(chkptIO) 'hvib' !POSIXIO!write(chkptIO) hvib%me - if(proc_rank.eq.0) call MPI_File_write(chkptMPIIO,'hvib',4,mpi_character,mpi_status_ignore,ierr) + if(proc_rank.eq.0) then + call MPI_File_seek(chkptMPIIO, mpioffset, MPI_SEEK_END) + call MPI_File_write(chkptMPIIO,'hvib',4,mpi_character,mpi_status_ignore,ierr) + endif call co_write_matrix_distr(hvib%me,mdimen, startdim, enddim,chkptMPIIO) ! endif @@ -15335,7 +15982,10 @@ subroutine PTcontracted_matelem_class(jrot) ! !POSIXIO!write(chkptIO) 'End Kinetic part' !POSIXIO!close(chkptIO,status='keep') - if(proc_rank.eq.0) call MPI_File_write(chkptMPIIO,'End Kinetic Part',16,mpi_character,mpi_status_ignore,ierr) + if(proc_rank.eq.0) then + call MPI_File_seek(chkptMPIIO, mpioffset, MPI_SEEK_END) + call MPI_File_write(chkptMPIIO,'End Kinetic Part',16,mpi_character,mpi_status_ignore,ierr) + endif call MPI_File_close(chkptMPIIO, ierr) ! endif From 763a291e6663fdd9bdab719af9c08dd7a6cad4e0 Mon Sep 17 00:00:00 2001 From: Arjen Tamerus Date: Fri, 22 Feb 2019 16:13:02 +0000 Subject: [PATCH 07/79] coarray_aux => mpi_aux -- no longer using coarrays --- makefile | 6 +- coarray_aux.f90 => mpi_aux.f90 | 26 ++++---- perturbation.f90 | 109 ++++++++++++++++----------------- trove.f90 | 6 +- 4 files changed, 74 insertions(+), 73 deletions(-) rename coarray_aux.f90 => mpi_aux.f90 (95%) diff --git a/makefile b/makefile index ac5ed83..900e8e3 100644 --- a/makefile +++ b/makefile @@ -34,11 +34,11 @@ LIB = $(LAPACK) trove.x: trove.o accuracy.o perturbation.o fields.o symmetry.o molecules.o me_numer.o me_str.o me_bnd.o me_rot.o \ lapack.o plasma.o moltype.o refinement.o dipole.o refinement.o tran.o diag.o timer.o input.o \ mol_xy.o mol_xy2.o mol_xy3.o mol_xy4.o mol_zxy2.o mol_zxy3.o mol_ch3oh.o mol_abcd.o mol_c2h4.o mol_c2h6.o mol_c3h6.o \ - pot_xy2.o pot_xy3.o pot_xy4.o pot_zxy2.o pot_zxy3.o pot_ch3oh.o pot_abcd.o pot_c2h4.o pot_c2h6.o pot_c3h6.o coarray_aux.o $(pot_user).o + pot_xy2.o pot_xy3.o pot_xy4.o pot_zxy2.o pot_zxy3.o pot_ch3oh.o pot_abcd.o pot_c2h4.o pot_c2h6.o pot_c3h6.o mpi_aux.o $(pot_user).o $(FOR) $(FFLAGS) -o j-trove$(PLAT).x $^ $(LIB) trove.o: accuracy.o fields.o perturbation.o symmetry.o timer.o moltype.o dipole.o refinement.o tran.o -perturbation.o: accuracy.o molecules.o lapack.o fields.o timer.o symmetry.o diag.o plasma.o coarray_aux.o +perturbation.o: accuracy.o molecules.o lapack.o fields.o timer.o symmetry.o diag.o plasma.o mpi_aux.o fields.o: accuracy.o molecules.o lapack.o me_str.o timer.o me_numer.o input.o me_rot.o moltype.o symmetry.o me_bnd.o symmetry.o: accuracy.o molecules.o: accuracy.o moltype.o mol_xy.o mol_xy2.o mol_xy3.o mol_xy4.o mol_zxy2.o mol_zxy3.o mol_ch3oh.o mol_abcd.o mol_c2h4.o mol_c2h6.o mol_c3h6.o \ @@ -83,7 +83,7 @@ pot_c2h4.o: accuracy.o moltype.o pot_c3h6.o: accuracy.o moltype.o pot_abcd.o: accuracy.o moltype.o lapack.o -coarray_aux.o: timer.o +mpi_aux.o: timer.o clean: rm -f *.mod *.o diff --git a/coarray_aux.f90 b/mpi_aux.f90 similarity index 95% rename from coarray_aux.f90 rename to mpi_aux.f90 index d92c4c0..4be132a 100644 --- a/coarray_aux.f90 +++ b/mpi_aux.f90 @@ -1,4 +1,4 @@ -module coarray_aux +module mpi_aux use mpi_f08 use timer use accuracy @@ -7,7 +7,7 @@ module coarray_aux public co_init_comms, co_finalize_comms, co_init_distr, co_distr_data, co_write_matrix_distr public co_create_type - public send_or_recv, comm_size, proc_rank + public send_or_recv, comm_size, mpi_rank public co_startdim, co_enddim interface co_sum @@ -24,7 +24,7 @@ module coarray_aux end interface integer,dimension(:),allocatable :: proc_sizes, proc_offsets, send_or_recv - integer :: comm_size, proc_rank + integer :: comm_size, mpi_rank integer :: co_startdim, co_enddim logical :: comms_inited = .false., distr_inited=.false. type(MPI_Datatype) :: mpitype_column @@ -50,7 +50,7 @@ subroutine co_sum_double(x, result_image) !end if !sync all - if (proc_rank .eq. 0) then + if (mpi_rank .eq. 0) then call mpi_reduce(mpi_in_place, x, size(x), mpi_double_precision, mpi_sum, 0, mpi_comm_world) else call mpi_reduce(x, x, size(x), mpi_double_precision, mpi_sum, 0, mpi_comm_world) @@ -113,7 +113,7 @@ subroutine co_gatherv_double(x) if (comm_size .eq. 1) return call TimerStart('CO_GATHERV_DOUBLE') - if (proc_rank.eq.0) then + if (mpi_rank.eq.0) then call mpi_gatherv(x, 0, mpi_double_precision, x, proc_sizes, proc_offsets, mpi_double_precision, 0, mpi_comm_world) else call mpi_gatherv(x, size(x), mpi_double_precision, x, proc_sizes, proc_offsets, mpi_double_precision, 0, mpi_comm_world) @@ -123,7 +123,6 @@ subroutine co_gatherv_double(x) end subroutine co_gatherv_double - subroutine co_init_comms() integer :: ierr @@ -131,7 +130,7 @@ subroutine co_init_comms() if (ierr .gt. 0) stop "MPI_INIT" call mpi_comm_size(mpi_comm_world, comm_size, ierr) if (ierr .gt. 0) stop "MPI_COMM_SIZE" - call mpi_comm_rank(mpi_comm_world, proc_rank, ierr) + call mpi_comm_rank(mpi_comm_world, mpi_rank, ierr) if (ierr .gt. 0) stop "MPI_COMM_RANK" comms_inited = .true. @@ -146,6 +145,7 @@ subroutine co_finalize_comms() call mpi_finalize(ierr) if (ierr .gt. 0) stop "MPI_FINALIZE" + end subroutine co_finalize_comms subroutine co_init_distr(dimen, startdim, enddim, blocksize) @@ -158,7 +158,7 @@ subroutine co_init_distr(dimen, startdim, enddim, blocksize) if (.not. comms_inited) stop "COMMS NOT INITIALISED" if (distr_inited) stop "DISTRIBUTION ALREADY INITIALISED" - proc_index = proc_rank+1 + proc_index = mpi_rank+1 allocate(proc_sizes(comm_size),proc_offsets(comm_size),send_or_recv(comm_size),starts(comm_size),ends(comm_size),stat=ierr) if (ierr .gt. 0) stop "CO_INIT_DISTR ALLOCATION FAILED" @@ -170,7 +170,7 @@ subroutine co_init_distr(dimen, startdim, enddim, blocksize) send_or_recv(1) = 0 else - if (proc_rank .eq. 0) then !root + if (mpi_rank .eq. 0) then !root localsize = dimen/comm_size localsize_ = int(1+real(dimen/comm_size)) @@ -245,6 +245,7 @@ end subroutine co_init_distr subroutine co_distr_data(x, tmp, blocksize, lb, ub) use accuracy + implicit none real(rk),dimension(:,lb:),intent(inout) :: x real(rk),dimension(:,:,:),intent(inout) :: tmp @@ -259,6 +260,7 @@ subroutine co_distr_data(x, tmp, blocksize, lb, ub) do i=1,comm_size reqs(i)= MPI_REQUEST_NULL end do + do i=1,comm_size if (send_or_recv(i).eq.1) then call mpi_isend(x,1,mpi_blocktype(i),i-1,0,mpi_comm_world,reqs(i),ierr) @@ -303,7 +305,7 @@ subroutine co_write_matrix_distr(x, longdim, lb, ub, outfile) call TimerStart('MPI_write') - !if (proc_rank .eq. 0) then + !if (mpi_rank .eq. 0) then call mpi_file_get_size(outfile,mpioffset,ierr) !endif @@ -311,10 +313,10 @@ subroutine co_write_matrix_distr(x, longdim, lb, ub, outfile) call MPI_Type_size(mpi_double_precision, mpi_real_size,ierr) - mpioffset = mpioffset + proc_rank * (longdim * int(1+real(longdim/comm_size),mpi_offset_kind) * mpi_real_size) + mpioffset = mpioffset + mpi_rank * (longdim * int(1+real(longdim/comm_size),mpi_offset_kind) * mpi_real_size) mpi_write_offsetkind = int(1+real(longdim/comm_size),MPI_Offset_kind) - if (proc_rank .lt. (comm_size-1)) then + if (mpi_rank .lt. (comm_size-1)) then writecount = int(1+real(longdim/comm_size)) else writecount = longdim-((comm_size-1)*int(1+real(longdim/comm_size))) diff --git a/perturbation.f90 b/perturbation.f90 index b1b366f..0140c30 100644 --- a/perturbation.f90 +++ b/perturbation.f90 @@ -6508,7 +6508,7 @@ end subroutine PTsymmetrization ! Here we construct the Hamiltonian matrix in the contracted basis set representation ! subroutine PThamiltonian_contract(jrot) - use coarray_aux + use mpi_aux integer(ik),intent(in) :: jrot ! rotational quantum number ! @@ -6570,7 +6570,7 @@ subroutine PThamiltonian_contract(jrot) ! Prepare the storing information if necessary: ! if ( trim(job%IOeigen_action)=='SAVE'.or.trim(job%IOeigen_action)=='APPEND' ) then - if (proc_rank.eq.0) call check_point_active_space(job%IOeigen_action) + if (mpi_rank.eq.0) call check_point_active_space(job%IOeigen_action) endif ! ! obtain zpe @@ -6696,7 +6696,7 @@ subroutine PThamiltonian_contract(jrot) ! if ( trim(job%IOeigen_action)=='SAVE'.or.trim(job%IOeigen_action)=='APPEND' ) then ! - if (proc_rank.eq.0) call check_point_active_space('CLOSE') + if (mpi_rank.eq.0) call check_point_active_space('CLOSE') ! endif ! @@ -7292,7 +7292,7 @@ subroutine PThamiltonian_contract(jrot) call TimerStop('Calculating the Hamiltonian matrix') ! if (job%verbose>=4) write(out,"('...done!')") - if (proc_rank.eq.0) then!mpiio + if (mpi_rank.eq.0) then!mpiio ! Correction for the case we do not compute the vibrational part of the ! Hamiltonian: ! @@ -7633,7 +7633,7 @@ subroutine PThamiltonian_contract(jrot) ! if ( trim(job%IOeigen_action)=='SAVE'.or.trim(job%IOeigen_action)=='APPEND' ) then ! - if(proc_rank.eq.0) call check_point_active_space('CLOSE') + if(mpi_rank.eq.0) call check_point_active_space('CLOSE') ! endif ! @@ -7670,7 +7670,7 @@ end subroutine PThamiltonian_contract subroutine open_chkptfile_mpi(fileh, filename, mode) use mpi_f08 - use coarray_aux + use mpi_aux type(MPI_File),intent(inout) :: fileh character(len=*),intent(in) :: filename, mode @@ -7686,7 +7686,7 @@ subroutine open_chkptfile_mpi(fileh, filename, mode) call MPI_File_Open(mpi_comm_world, filename, amode, mpi_info_null, fileh, ierr) if (ierr) then - if (proc_rank .eq. 0) write(*,*) "Error opening MPI-IO-formatted Vib. kinetic checkpoint file." + if (mpi_rank .eq. 0) write(*,*) "Error opening MPI-IO-formatted Vib. kinetic checkpoint file." stop "MPI_PTrestore_rot_kinetic_matrix_elements - Error opening MATELEM MPI-IO input file" endif @@ -7696,14 +7696,14 @@ end subroutine open_chkptfile_mpi subroutine close_chkptfile_mpi(fileh) use mpi_f08 - use coarray_aux + use mpi_aux type(MPI_File), intent(inout) :: fileh integer :: ierr call mpi_file_close(fileh, ierr) if (ierr) then - if (proc_rank .eq. 0) write(*,*) "Error closing MPI-IO-formatted Vib. kinetic checkpoint file." + if (mpi_rank .eq. 0) write(*,*) "Error closing MPI-IO-formatted Vib. kinetic checkpoint file." stop "MPI_PTrestore_rot_kinetic_matrix_elements - Error closing MATELEM MPI-IO input file" endif end subroutine close_chkptfile_mpi @@ -7712,7 +7712,7 @@ subroutine PTrestore_rot_kinetic_matrix_elements_MPI(jrot, task, fileh, dimen, & PT, PTvibrational_me_calc,grot,gcor,hvib, & ncontr, maxcontr, icontr) use mpi_f08 - use coarray_aux + use mpi_aux integer(ik),intent(in) :: jrot character(len=cl),intent(in) :: task type(MPI_File),intent(inout) :: fileh @@ -7742,9 +7742,9 @@ subroutine PTrestore_rot_kinetic_matrix_elements_MPI(jrot, task, fileh, dimen, & if ( trim(job%IOswap_matelem)/='NONE') return - if (proc_rank .eq. 0 .and. (job%verbose>=6.and.present(icontr)) ) write(out,"('icontr = ',i)") icontr + if (mpi_rank .eq. 0 .and. (job%verbose>=6.and.present(icontr)) ) write(out,"('icontr = ',i)") icontr - if (proc_rank .eq. 0) write(*,*) "DEBUG|TASK =", task + if (mpi_rank .eq. 0) write(*,*) "DEBUG|TASK =", task select case (trim(task)) case('top') @@ -7762,7 +7762,7 @@ subroutine PTrestore_rot_kinetic_matrix_elements_MPI(jrot, task, fileh, dimen, & call MPI_File_read_all(fileh, readbuf, 7, mpi_character, mpi_status_ignore, ierr) if (readbuf(1:7) /= '[MPIIO]') then - if (proc_rank .eq. 0) write(*,*) "Invalid MPIIO identifier to MPI-IO-formatted Vib. kinetic checkpoint file." + if (mpi_rank .eq. 0) write(*,*) "Invalid MPIIO identifier to MPI-IO-formatted Vib. kinetic checkpoint file." call mpi_barrier(mpi_comm_world, ierr) stop "MPI_PTrestore_rot_kinetic_matrix_elements - bogus file format header" endif @@ -7770,7 +7770,7 @@ subroutine PTrestore_rot_kinetic_matrix_elements_MPI(jrot, task, fileh, dimen, & call MPI_File_read_all(fileh, readbuf, 18, mpi_character, mpi_status_ignore, ierr) if (readbuf(1:18) /= 'Start Kinetic part') then - if (proc_rank .eq. 0) write(*,*) "Invalid header to MPI-IO-formatted Vib. kinetic checkpoint file." + if (mpi_rank .eq. 0) write(*,*) "Invalid header to MPI-IO-formatted Vib. kinetic checkpoint file." call mpi_barrier(mpi_comm_world, ierr) stop "MPI_PTrestore_rot_kinetic_matrix_elements - bogus file format" endif @@ -7778,8 +7778,8 @@ subroutine PTrestore_rot_kinetic_matrix_elements_MPI(jrot, task, fileh, dimen, & call MPI_File_read_all(fileh, ncontr, 1, mpi_integer, mpi_status_ignore, ierr) if (jrot==0.and.PT%Maxcontracts/=ncontr) then - if (proc_rank .eq. 0) write (out,"(' Vib. kinetic checkpoint file ',a)") job%kinetmat_file - if (proc_rank .eq. 0) write (out,"(' Actual and stored basis sizes at J=0 do not agree ',2i)") PT%Maxcontracts,ncontr + if (mpi_rank .eq. 0) write (out,"(' Vib. kinetic checkpoint file ',a)") job%kinetmat_file + if (mpi_rank .eq. 0) write (out,"(' Actual and stored basis sizes at J=0 do not agree ',2i)") PT%Maxcontracts,ncontr call mpi_barrier(mpi_comm_world, ierr) stop 'PTrestore_rot_kinetic_matrix_elements - in file - illegal nroots ' end if @@ -7787,7 +7787,7 @@ subroutine PTrestore_rot_kinetic_matrix_elements_MPI(jrot, task, fileh, dimen, & rootsize = int(ncontr*(ncontr+1)/2,hik) rootsize2 = int(ncontr*ncontr,hik) - if (proc_rank .eq. 0.and.job%verbose>=6) write(out,"(/'Restore_rot_kin...: Number of elements: ',i8)") PT%Maxcontracts + if (mpi_rank .eq. 0.and.job%verbose>=6) write(out,"(/'Restore_rot_kin...: Number of elements: ',i8)") PT%Maxcontracts ! Read the indexes of the J=0 contracted basis set. @@ -7798,7 +7798,7 @@ subroutine PTrestore_rot_kinetic_matrix_elements_MPI(jrot, task, fileh, dimen, & call MPI_File_read_all(fileh, readbuf, 10, mpi_character, mpi_status_ignore, ierr) if (readbuf(1:10)/='icontr_cnu') then - if (proc_rank .eq. 0) write (out,"(' Vib. kinetic checkpoint file ',a,': icontr_cnu is missing ',a)") job%kinetmat_file,readbuf(1:10) + if (mpi_rank .eq. 0) write (out,"(' Vib. kinetic checkpoint file ',a,': icontr_cnu is missing ',a)") job%kinetmat_file,readbuf(1:10) call mpi_barrier(mpi_comm_world, ierr) stop 'PTrestore_rot_kinetic_matrix_elements - in file - icontr_cnu missing' end if @@ -7807,7 +7807,7 @@ subroutine PTrestore_rot_kinetic_matrix_elements_MPI(jrot, task, fileh, dimen, & call MPI_File_read_all(fileh, readbuf, 10, mpi_character, mpi_status_ignore, ierr) if (readbuf(1:11)/='icontr_ideg') then - if (proc_rank .eq. 0) write (out,"(' Vib. kinetic checkpoint file ',a,': icontr_ideg is missing ',a)") job%kinetmat_file,readbuf(1:11) + if (mpi_rank .eq. 0) write (out,"(' Vib. kinetic checkpoint file ',a,': icontr_ideg is missing ',a)") job%kinetmat_file,readbuf(1:11) call mpi_barrier(mpi_comm_world, ierr) stop 'PTrestore_rot_kinetic_matrix_elements - in file - icontr_ideg missing' end if @@ -7830,7 +7830,7 @@ subroutine PTrestore_rot_kinetic_matrix_elements_MPI(jrot, task, fileh, dimen, & call MPI_File_read_all(fileh, readbuf, 10, mpi_character, mpi_status_ignore, ierr) if (readbuf(1:10)/='icontr_cnu') then - if (proc_rank .eq. 0) write (out,"(' Vib. kinetic checkpoint file ',a,': icontr_cnu is missing ',a)") job%kinetmat_file,readbuf(1:10) + if (mpi_rank .eq. 0) write (out,"(' Vib. kinetic checkpoint file ',a,': icontr_cnu is missing ',a)") job%kinetmat_file,readbuf(1:10) call mpi_barrier(mpi_comm_world, ierr) stop 'PTrestore_rot_kinetic_matrix_elements - in file - icontr_cnu missing' end if @@ -7839,7 +7839,7 @@ subroutine PTrestore_rot_kinetic_matrix_elements_MPI(jrot, task, fileh, dimen, & call MPI_File_read_all(fileh, readbuf, 11, mpi_character, mpi_status_ignore, ierr) if (readbuf(1:11)/='icontr_ideg') then - if (proc_rank .eq. 0) write (out,"(' Vib. kinetic checkpoint file ',a,': icontr_ideg is missing ',a)") job%kinetmat_file,readbuf(1:11) + if (mpi_rank .eq. 0) write (out,"(' Vib. kinetic checkpoint file ',a,': icontr_ideg is missing ',a)") job%kinetmat_file,readbuf(1:11) call mpi_barrier(mpi_comm_world, ierr) stop 'PTrestore_rot_kinetic_matrix_elements - in file - icontr_ideg missing' end if @@ -7852,7 +7852,7 @@ subroutine PTrestore_rot_kinetic_matrix_elements_MPI(jrot, task, fileh, dimen, & call MPI_File_read_all(fileh, readbuf, 7, mpi_character, mpi_status_ignore, ierr) if (readbuf(1:7)/='vib-rot') then - if (proc_rank .eq. 0) write (out,"(' Vib. kinetic checkpoint file ',a,': label vib-rot is missing ',a)") job%kinetmat_file,readbuf(1:7) + if (mpi_rank .eq. 0) write (out,"(' Vib. kinetic checkpoint file ',a,': label vib-rot is missing ',a)") job%kinetmat_file,readbuf(1:7) call mpi_barrier(mpi_comm_world, ierr) stop 'PTrestore_rot_kinetic_matrix_elements - in file - vib-rot missing' end if @@ -7874,20 +7874,20 @@ subroutine PTrestore_rot_kinetic_matrix_elements_MPI(jrot, task, fileh, dimen, & !TODO_FOREIGNFUNCTION! call Find_groundstate_icontr(maxcontr) - if (proc_rank .eq. 0 .and. job%verbose>=4) write(out,"(' ...done!')") + if (mpi_rank .eq. 0 .and. job%verbose>=4) write(out,"(' ...done!')") - if (proc_rank .eq. 0 .and. job%verbose>=4.and.maxcontr/=ncontr) then + if (mpi_rank .eq. 0 .and. job%verbose>=4.and.maxcontr/=ncontr) then write (out,"(' The contracted basis set is reduced: ',i,' -> ',i)") ncontr,maxcontr end if if (maxcontr>ncontr) then - if (proc_rank .eq. 0) write (out,"(' Actual and stored basis sizes at J=0 do not agree (maxcontr,ncontr) ',2i8)") maxcontr,ncontr + if (mpi_rank .eq. 0) write (out,"(' Actual and stored basis sizes at J=0 do not agree (maxcontr,ncontr) ',2i8)") maxcontr,ncontr call mpi_barrier(mpi_comm_world, ierr) stop 'PTrestore_rot_kinetic_matrix_elements - in file - illegal ncontr ' end if case('rot') ! - if (proc_rank .eq. 0 .and. job%verbose>=4) write(out,"(' Read and process rotational part...')") + if (mpi_rank .eq. 0 .and. job%verbose>=4) write(out,"(' Read and process rotational part...')") ! ! Read the rotational part only ! @@ -7898,7 +7898,7 @@ subroutine PTrestore_rot_kinetic_matrix_elements_MPI(jrot, task, fileh, dimen, & ! call MPI_File_read_all(fileh, readbuf, 5, mpi_character, mpi_status_ignore, ierr) if (readbuf(1:5)/='g_rot') then - if(proc_rank .eq. 0) write (out,"(' Vib. kinetic checkpoint file ',a,': g_rot is missing ',a)") job%kinetmat_file,readbuf(1:5) + if(mpi_rank .eq. 0) write (out,"(' Vib. kinetic checkpoint file ',a,': g_rot is missing ',a)") job%kinetmat_file,readbuf(1:5) call mpi_barrier(mpi_comm_world, ierr) stop 'PTrestore_rot_kinetic_matrix_elements - in file - g_rot missing' end if @@ -7936,7 +7936,7 @@ subroutine PTrestore_rot_kinetic_matrix_elements_MPI(jrot, task, fileh, dimen, & ! case('cor') ! - if (proc_rank .eq. 0 .and. job%verbose>=4) write(out,"(' Read and process Coriolis part...')") + if (mpi_rank .eq. 0 .and. job%verbose>=4) write(out,"(' Read and process Coriolis part...')") ! allocate(mat_(maxcontr,maxcontr),stat=ierr) call ArrayStart('PThamiltonian_contract: mat_',ierr,1,kind(mat_),rootsize2_) @@ -7945,7 +7945,7 @@ subroutine PTrestore_rot_kinetic_matrix_elements_MPI(jrot, task, fileh, dimen, & ! call MPI_File_read_all(fileh, readbuf, 5, mpi_character, mpi_status_ignore, ierr) if (readbuf(1:5)/='g_cor') then - if (proc_rank .eq. 0) write (out,"(' Vib. kinetic checkpoint file ',a,': g_cor is missing ',a)") job%kinetmat_file,readbuf(1:5) + if (mpi_rank .eq. 0) write (out,"(' Vib. kinetic checkpoint file ',a,': g_cor is missing ',a)") job%kinetmat_file,readbuf(1:5) call mpi_barrier(mpi_comm_world, ierr) stop 'PTrestore_rot_kinetic_matrix_elements - in file - g_cor missing' end if @@ -7976,7 +7976,7 @@ subroutine PTrestore_rot_kinetic_matrix_elements_MPI(jrot, task, fileh, dimen, & deallocate(mat_) call ArrayStop('PThamiltonian_contract: mat_') ! - if (proc_rank .eq. 0 .and. job%verbose>=4) write(out,"(' ...done!')") + if (mpi_rank .eq. 0 .and. job%verbose>=4) write(out,"(' ...done!')") ! case('vib') ! @@ -7989,7 +7989,7 @@ subroutine PTrestore_rot_kinetic_matrix_elements_MPI(jrot, task, fileh, dimen, & ! call MPI_File_read_all(fileh, readbuf, 5, mpi_character, mpi_status_ignore, ierr) if (readbuf(1:5)/='g_rot') then - if (proc_rank .eq. 0) write (out,"(' Vib. kinetic checkpoint file ',a,': g_rot is missing ',a)") job%kinetmat_file,readbuf(1:5) + if (mpi_rank .eq. 0) write (out,"(' Vib. kinetic checkpoint file ',a,': g_rot is missing ',a)") job%kinetmat_file,readbuf(1:5) call mpi_barrier(mpi_comm_world, ierr) stop 'PTrestore_rot_kinetic_matrix_elements - in file - g_rot missing' end if @@ -8004,7 +8004,7 @@ subroutine PTrestore_rot_kinetic_matrix_elements_MPI(jrot, task, fileh, dimen, & ! call MPI_File_read_all(fileh, readbuf, 5, mpi_character, mpi_status_ignore, ierr) if (readbuf(1:5)/='g_cor') then - if (proc_rank .eq. 0) write (out,"(' Vib. kinetic checkpoint file ',a,': g_cor is missing ',a)") job%kinetmat_file,readbuf(1:5) + if (mpi_rank .eq. 0) write (out,"(' Vib. kinetic checkpoint file ',a,': g_cor is missing ',a)") job%kinetmat_file,readbuf(1:5) call mpi_barrier(mpi_comm_world, ierr) stop 'PTrestore_rot_kinetic_matrix_elements - in file - g_cor missing' end if @@ -8022,12 +8022,12 @@ subroutine PTrestore_rot_kinetic_matrix_elements_MPI(jrot, task, fileh, dimen, & ! endif ! - if (proc_rank .eq. 0 .and. job%verbose>=6) write(out,"(' rootsize_,rootsize = ',2i)") rootsize_,rootsize + if (mpi_rank .eq. 0 .and. job%verbose>=6) write(out,"(' rootsize_,rootsize = ',2i)") rootsize_,rootsize ! call MPI_File_read_all(fileh, readbuf, 4, mpi_character, mpi_status_ignore, ierr) if (readbuf(1:4)/='hvib') then - if (proc_rank .eq. 0) write (out,"(' Vib. kinetic checkpoint file ',a,': hvib is missing ',a)") job%kinetmat_file,readbuf(1:4) - if (proc_rank .eq. 0 .and. readbuf(1:4)=='g_ro') then + if (mpi_rank .eq. 0) write (out,"(' Vib. kinetic checkpoint file ',a,': hvib is missing ',a)") job%kinetmat_file,readbuf(1:4) + if (mpi_rank .eq. 0 .and. readbuf(1:4)=='g_ro') then write (out,"(' Most likely non-divided chk-points used with MATELEM READ SPLIT')") write (out,"(' Re-do MATELEM SAVE SPLIT or use MATELEM SPLIT READ')") endif @@ -8050,14 +8050,14 @@ subroutine PTrestore_rot_kinetic_matrix_elements_MPI(jrot, task, fileh, dimen, & ! call MPI_File_read_all(fileh, readbuf, 16, mpi_character, mpi_status_ignore, ierr) if (readbuf(1:16)/='End Kinetic part') then - if (proc_rank .eq. 0) write (out,"(' Vib. kinetic checkpoint file ',a,' has bogus footer: ',a)") job%kinetmat_file,readbuf(1:16) + if (mpi_rank .eq. 0) write (out,"(' Vib. kinetic checkpoint file ',a,' has bogus footer: ',a)") job%kinetmat_file,readbuf(1:16) call mpi_barrier(mpi_comm_world, ierr) stop 'PTrestore_rot_kinetic_matrix_elements - bogus file format' end if ! call close_chkptfile_mpi(fileh) ! - if (proc_rank .eq. 0 .and. job%verbose>=4) write(out,"(' ...done!')") + if (mpi_rank .eq. 0 .and. job%verbose>=4) write(out,"(' ...done!')") ! !!case('top-icontr') !! ! @@ -8235,7 +8235,7 @@ subroutine PTrestore_rot_kinetic_matrix_elements_MPI(jrot, task, fileh, dimen, & subroutine divided_slice_open_mpi(islice,fileh,chkpt_type,suffix) use mpi_f08 - use coarray_aux + use mpi_aux integer(ik),intent(in) :: islice type(MPI_File),intent(inout) :: fileh character(len=*),intent(in) :: chkpt_type,suffix @@ -8254,14 +8254,14 @@ subroutine divided_slice_open_mpi(islice,fileh,chkpt_type,suffix) call MPI_File_read_all(fileh, readbuf, 5, mpi_character, mpi_status_ignore, ierr) if ( trim(readbuf(1:ilen))/=trim(chkpt_type) ) then - if (proc_rank .eq. 0) write (out,"(' kinetic checkpoint slice ',a20,': header is missing or wrong',a)") filename,readbuf(1:ilen) + if (mpi_rank .eq. 0) write (out,"(' kinetic checkpoint slice ',a20,': header is missing or wrong',a)") filename,readbuf(1:ilen) stop 'PTrestore_rot_kinetic_matrix_elements - in slice - header missing or wrong' end if end subroutine divided_slice_open_mpi subroutine divided_slice_close_mpi(islice,fileh,chkpt_type) use mpi_f08 - use coarray_aux + use mpi_aux integer(ik),intent(in) :: islice type(MPI_File),intent(inout) :: fileh character(len=*),intent(in) :: chkpt_type @@ -8276,7 +8276,7 @@ subroutine divided_slice_close_mpi(islice,fileh,chkpt_type) ! call MPI_File_read_all(fileh, readbuf, ilen, mpi_character, mpi_status_ignore, ierr) if ( trim(readbuf(1:ilen))/=trim(chkpt_type) ) then - if(proc_rank .eq. 0) write (out,"(' divided_slice_close, kinetic checkpoint slice: footer is missing or wrong',a)") readbuf(1:ilen) + if(mpi_rank .eq. 0) write (out,"(' divided_slice_close, kinetic checkpoint slice: footer is missing or wrong',a)") readbuf(1:ilen) stop 'divided_slice_close - in slice - footer missing or wrong' end if ! @@ -8917,7 +8917,7 @@ end subroutine PTrestore_rot_kinetic_matrix_elements ! for the K-factorized rotational basis ! recursive subroutine symm_mat_element_vector_k(jrot,irow,ijterm,func,mat_t,no_diagonalization) - use coarray_aux + use mpi_aux integer(ik),intent(in) :: jrot,irow,ijterm(:,:) real(rk),external :: func @@ -15129,7 +15129,7 @@ end subroutine matrix_transform ! Contracted matrix elements ! subroutine PTcontracted_matelem_class(jrot) - use coarray_aux + use mpi_aux ! implicit none ! @@ -15239,11 +15239,11 @@ subroutine PTcontracted_matelem_class(jrot) rootsize = int(mdimen*mdimen,hik) call co_init_distr(mdimen, startdim, enddim, blocksize_) allocate(reqs(comm_size)) - write(*,*) "SENDRECV:", proc_rank, send_or_recv + write(*,*) "SENDRECV:", mpi_rank, send_or_recv mdimen_p = int(1+real(mdimen/comm_size)) mdimen_b = comm_size*mdimen_p - write(*,*) "DIMS", proc_rank, mdimen, mdimen_b, mdimen_p, comm_size*mdimen_b*mdimen_p + write(*,*) "DIMS", mpi_rank, mdimen, mdimen_b, mdimen_p, comm_size*mdimen_b*mdimen_p blocksize = blocksize_ ! ! The vibrational (J=0) matrix elements of the rotational and coriolis @@ -15285,7 +15285,7 @@ subroutine PTcontracted_matelem_class(jrot) call MPI_File_set_errhandler(chkptMPIIO, MPI_ERRORS_ARE_FATAL) mpioffset=0 call MPI_File_set_size(chkptMPIIO, mpioffset, ierr) - if (proc_rank.eq.0) then !AT + if (mpi_rank.eq.0) then !AT call TimerStart('mpiiosingle') !AT call MPI_File_write(chkptMPIIO,'[MPIIO]',7,mpi_character,mpi_status_ignore,ierr) @@ -15475,7 +15475,7 @@ subroutine PTcontracted_matelem_class(jrot) if (trim(job%IOkinet_action)=='SAVE'.and..not.job%IOmatelem_split) then ! !POSIXIO!write(chkptIO) 'g_rot' - if(proc_rank.eq.0) then + if(mpi_rank.eq.0) then call MPI_File_seek(chkptMPIIO, mpioffset, MPI_SEEK_END) call MPI_File_write(chkptMPIIO,'g_rot',5,mpi_character,mpi_status_ignore,ierr) endif @@ -15544,7 +15544,6 @@ subroutine PTcontracted_matelem_class(jrot) !POSIXIO! endif !POSIXIO!endif call co_write_matrix_distr(grot_t,mdimen, startdim, enddim,chkptMPIIO) - if (proc_rank.eq.0) write(*,*) grot_t(1,1), grot_t(6533,1) ! enddo enddo @@ -15554,7 +15553,7 @@ subroutine PTcontracted_matelem_class(jrot) if (trim(job%IOkinet_action)=='SAVE'.and..not.job%IOmatelem_split) then ! !POSIXIO!write(chkptIO) 'g_cor' - if(proc_rank.eq.0) then + if(mpi_rank.eq.0) then call MPI_File_seek(chkptMPIIO, mpioffset, MPI_SEEK_END) call MPI_File_write(chkptMPIIO,'g_cor',5,mpi_character,mpi_status_ignore,ierr) endif @@ -15964,7 +15963,7 @@ subroutine PTcontracted_matelem_class(jrot) ! !POSIXIO!write(chkptIO) 'hvib' !POSIXIO!write(chkptIO) hvib%me - if(proc_rank.eq.0) then + if(mpi_rank.eq.0) then call MPI_File_seek(chkptMPIIO, mpioffset, MPI_SEEK_END) call MPI_File_write(chkptMPIIO,'hvib',4,mpi_character,mpi_status_ignore,ierr) endif @@ -15982,7 +15981,7 @@ subroutine PTcontracted_matelem_class(jrot) ! !POSIXIO!write(chkptIO) 'End Kinetic part' !POSIXIO!close(chkptIO,status='keep') - if(proc_rank.eq.0) then + if(mpi_rank.eq.0) then call MPI_File_seek(chkptMPIIO, mpioffset, MPI_SEEK_END) call MPI_File_write(chkptMPIIO,'End Kinetic Part',16,mpi_character,mpi_status_ignore,ierr) endif @@ -16023,7 +16022,7 @@ subroutine PTcontracted_matelem_class(jrot) call MPI_File_open(mpi_comm_world, 'mpiioEXTfile', mpi_mode_wronly+mpi_mode_create, mpi_info_null, chkptMPIIO, ierr) mpioffset=0 call MPI_File_set_size(chkptMPIIO, mpioffset, ierr) - if (proc_rank.eq.0) then !AT + if (mpi_rank.eq.0) then !AT call TimerStart('mpiiosingle') !AT call MPI_File_write(chkptMPIIO,'Start external field',20,mpi_character,mpi_status_ignore,ierr) @@ -16091,7 +16090,7 @@ subroutine PTcontracted_matelem_class(jrot) !POSIXIO! write(chkptIO) extF_t !POSIXIO! ! !POSIXIO!endif - if(proc_rank.eq.0) call MPI_File_write(chkptMPIIO,imu,1,mpi_integer,mpi_status_ignore,ierr) + if(mpi_rank.eq.0) call MPI_File_write(chkptMPIIO,imu,1,mpi_integer,mpi_status_ignore,ierr) call co_write_matrix_distr(extF_t,mdimen, startdim, enddim,chkptMPIIO) ! if (job%verbose>=4) write(out,"('...done')",advance='YES') @@ -16134,7 +16133,7 @@ subroutine PTcontracted_matelem_class(jrot) endif ! !POSIXIO!if (.not.job%IOextF_divide) write(chkptIO) 'End external field' - if (proc_rank.eq.0) then !AT + if (mpi_rank.eq.0) then !AT if(.not.job%IOextF_divide) call MPI_File_write(chkptMPIIO,'End external field',18,mpi_character,mpi_status_ignore,ierr) endif call MPI_File_close(chkptMPIIO, ierr) @@ -33664,7 +33663,7 @@ subroutine PTstore_icontr_cnu(Maxcontracts,iunit,dir) end subroutine PTstore_icontr_cnu subroutine ptstorempi_icontr_cnu(maxcontracts,iunit,dir) - use coarray_aux + use mpi_aux integer(ik),intent(in) :: maxcontracts type(mpi_file),intent(in) :: iunit diff --git a/trove.f90 b/trove.f90 index 6c7305e..b1e9b7d 100644 --- a/trove.f90 +++ b/trove.f90 @@ -13,7 +13,7 @@ module tp_module use dipole, only: dm_tranint,dm_analysis_density use refinement, only : refinement_by_fit,external_expectation_values use tran, only : TRconvert_matel_j0_eigen,TRconvert_repres_J0_to_contr - use coarray_aux, only: proc_rank,co_init_comms,co_finalize_comms + use mpi_aux, only: mpi_rank,co_init_comms,co_finalize_comms implicit none @@ -188,7 +188,7 @@ subroutine ptmain call co_init_comms() if (job%contrci_me_fast) then ! - if (proc_rank.eq.0) then + if (mpi_rank.eq.0) then call PTstore_contr_matelem(j) endif ! @@ -204,7 +204,7 @@ subroutine ptmain ! ! convert to j=0 representation as part of the first step j=0 calculation ! - if (proc_rank.eq.0 .and. job%convert_model_j0) then + if (mpi_rank.eq.0 .and. job%convert_model_j0) then ! call TRconvert_repres_J0_to_contr(j) call TRconvert_matel_j0_eigen(j) From ac0fd7057ee5f06fbb7280a8d535f7e663071073 Mon Sep 17 00:00:00 2001 From: Arjen Tamerus Date: Fri, 26 Apr 2019 15:40:03 +0100 Subject: [PATCH 08/79] Work around slow MPI-IO write on some systems, implement parallel writing of split files, reading of MPI-IO formatted files, some pblas experiments --- makefile | 4 +- mpi_aux.f90 | 80 ++++- perturbation.f90 | 738 +++++++++++++++++++++++++++++++---------------- tran.f90 | 384 ++++++++++++++++++++++-- 4 files changed, 918 insertions(+), 288 deletions(-) diff --git a/makefile b/makefile index 900e8e3..bbcd01d 100644 --- a/makefile +++ b/makefile @@ -13,14 +13,14 @@ pot_user = pot_ch4 PLAT = _2205_i17 ###FOR = ifort FOR = mpif90 -FFLAGS = -qopenmp -xcore-avx2 -O3 -ip -g3 +FFLAGS = -qopenmp -xcore-avx2 -O3 -ip #FFLAGS = -fopenmp -ffree-line-length-none -march=native -O3 -fcray-pointer -g3 #ARPACK = ~/libraries/ARPACK/libarpack_omp_64.a #LAPACK = -L/usr/local/software/spack/spack-0.11.2/opt/spack/linux-rhel7-x86_64/gcc-5.4.0/openblas-0.2.20-gbzlk5wei7fsojje2fiwj7w5wssikb73/lib -lopenblas -LAPACK = -mkl +LAPACK = -mkl=parallel -lmkl_scalapack_lp64 -lmkl_blacs_intelmpi_lp64 #LAPACK = -L${MKLROOT}/lib/intel64 -Wl,--no-as-needed -lmkl_gf_lp64 -lmkl_sequential -lmkl_core -lpthread -lm -ldl diff --git a/mpi_aux.f90 b/mpi_aux.f90 index 4be132a..70dae59 100644 --- a/mpi_aux.f90 +++ b/mpi_aux.f90 @@ -4,12 +4,15 @@ module mpi_aux use accuracy implicit none - public co_init_comms, co_finalize_comms, co_init_distr, co_distr_data, co_write_matrix_distr + public co_init_comms, co_finalize_comms, co_init_distr, co_distr_data, co_write_matrix_distr, co_read_matrix_distr public co_create_type public send_or_recv, comm_size, mpi_rank public co_startdim, co_enddim + public blacs_size, blacs_rank, blacs_ctxt + public nprow,npcol,myprow,mypcol, desca,descb,descc + interface co_sum module procedure :: co_sum_double end interface @@ -30,14 +33,34 @@ module mpi_aux type(MPI_Datatype) :: mpitype_column type(MPI_Datatype),dimension(:), allocatable :: mpi_blocktype + !blacs/pblas + integer :: blacs_size, blacs_rank, blacs_ctxt + integer :: nprow,npcol,myprow,mypcol + integer :: desca(9) + integer :: descb(9) + integer :: descc(9) + integer :: descd(9) + contains + subroutine co_init_pblas() + call blacs_pinfo(blacs_rank, blacs_size) + if (blacs_rank .lt. 0) return + + call blacs_get(-1, 0, blacs_ctxt) + call blacs_gridinit(blacs_ctxt, 'R', blacs_size/8, 8) + call blacs_gridinfo(blacs_ctxt, nprow, npcol, myprow, mypcol) + + write(*,"('BLACS: [',i2,',',i2'](',i4,i4,i4,i4',)')") mpi_rank,blacs_rank,nprow,npcol,myprow,mypcol + end subroutine co_init_pblas + subroutine co_sum_double(x, result_image) real*8, intent(inout), dimension(:,:) :: x integer, optional :: result_image integer :: i !integer, save :: result_image_mpi[*] + if (comm_size.eq.1) return call TimerStart('co_sum_double') !if (present(result_image)) then @@ -237,8 +260,11 @@ subroutine co_init_distr(dimen, startdim, enddim, blocksize) endif + call co_create_type(dimen) deallocate(starts,ends) + call co_init_pblas() + distr_inited = .true. end subroutine co_init_distr @@ -292,37 +318,65 @@ subroutine co_distr_data(x, tmp, blocksize, lb, ub) end subroutine co_distr_data + subroutine co_read_matrix_distr(x, longdim, lb, ub, infile) + use accuracy + + real(rk),dimension(:,lb:),intent(in) :: x + integer,intent(in) :: longdim, lb, ub + + type(MPI_File),intent(in) :: infile + type(MPI_Status) :: writestat + integer(kind=MPI_Offset_kind) :: offset_start,offset_end + integer :: readcount, mpi_real_size, ierr + + call MPI_Type_size(mpi_double_precision, mpi_real_size,ierr) + + if (mpi_rank .lt. (comm_size-1)) then + readcount = int(1+real(longdim/comm_size)) + else + readcount = longdim-((comm_size-1)*int(1+real(longdim/comm_size))) + endif + + offset_start = mpi_rank * (longdim * int(1+real(longdim/comm_size),mpi_offset_kind) * mpi_real_size) + offset_end = longdim + offset_end = (offset_end * offset_end * mpi_real_size) - offset_start + + call MPI_File_seek(infile, offset_start, MPI_SEEK_CUR) + call MPI_File_read_all(infile,x,readcount,mpitype_column,writestat,ierr) + call MPI_File_seek(infile, offset_end, MPI_SEEK_CUR) + + end subroutine co_read_matrix_distr + subroutine co_write_matrix_distr(x, longdim, lb, ub, outfile) use accuracy real(rk),dimension(:,lb:),intent(in) :: x integer,intent(in) :: longdim, lb, ub type(MPI_File),intent(in) :: outfile - integer :: ierr, mpi_real_size, writecount - integer(kind=MPI_Offset_kind) :: mpioffset,mpi_write_offsetkind + integer :: ierr, mpi_real_size, writecount, mpi_col_size + !integer(kind=MPI_Offset_kind) :: mpioffset,mpi_write_offsetkind + integer(kind=MPI_Offset_kind) :: offset_start, offset_end type(MPI_Status) :: writestat + call mpi_barrier(mpi_comm_world, ierr) call TimerStart('MPI_write') - !if (mpi_rank .eq. 0) then - call mpi_file_get_size(outfile,mpioffset,ierr) - !endif - - !call mpi_bcast(mpioffset,1,mpi_integer,0,mpi_comm_world,ierr) - call MPI_Type_size(mpi_double_precision, mpi_real_size,ierr) + call MPI_Type_size(mpitype_column, mpi_col_size,ierr) - mpioffset = mpioffset + mpi_rank * (longdim * int(1+real(longdim/comm_size),mpi_offset_kind) * mpi_real_size) - - mpi_write_offsetkind = int(1+real(longdim/comm_size),MPI_Offset_kind) if (mpi_rank .lt. (comm_size-1)) then writecount = int(1+real(longdim/comm_size)) else writecount = longdim-((comm_size-1)*int(1+real(longdim/comm_size))) endif - call MPI_File_write_at_all(outfile,mpioffset,x,writecount,mpitype_column,writestat,ierr) + offset_start = mpi_rank * int(1+real(longdim/comm_size),mpi_offset_kind) * mpi_col_size + offset_end = 0 + + call MPI_File_seek(outfile, offset_start, MPI_SEEK_END) + call MPI_File_write_all(outfile,x,writecount,mpitype_column,writestat,ierr) + call MPI_File_seek(outfile, offset_end, MPI_SEEK_END) call TimerStop('MPI_write') end subroutine co_write_matrix_distr diff --git a/perturbation.f90 b/perturbation.f90 index 0140c30..a6f43b2 100644 --- a/perturbation.f90 +++ b/perturbation.f90 @@ -23,7 +23,7 @@ module perturbation public PTcontracted_matelem_class,PTeigenfunction_orthogonality public PThamiltonian_contract,PTget_primitive_matelements,PTDVR_initialize public PTcheck_point_contracted_space,PT_conctracted_rotational_bset - public PTTest_eigensolution,PTanalysis_density,PTstore_icontr_cnu + public PTTest_eigensolution,PTanalysis_density,PTstore_icontr_cnu,PTstorempi_icontr_cnu public PTintcoeffsT,PTrotquantaT,PTNclasses,PTdefine_contr_from_eigenvect,PTeigenT,PTrepresT public PTstore_contr_matelem,PTcontracted_matelem_class_fast,PTstore_contr_matelem_II,PTcontracted_matelem_class_fast_II @@ -6001,7 +6001,7 @@ subroutine PTsymmetrization(j) endif ! if (job%verbose>=5) then - write(my_fmt,'(a,i0,a)') "(i7,a,",Nclasses+1,"i4,a,2x,",Nrepresen,"(f12.4))" + write(my_fmt,'(a,i0,a,i0,a)') "(i7,a,2(",Nclasses+1,"i4,a),,2x,",Nrepresen,"(f12.4))" write(out,my_fmt) icoeff,' : ',cnu(:),' isym= (',& (contr(iclasses)%eigen(cnu(iclasses))%isym,iclasses=0,PT%Nclasses),')',Nirr_rk(1:Nrepresen) endif @@ -6849,10 +6849,10 @@ subroutine PThamiltonian_contract(jrot) call TimerStart('Calculating the Hamiltonian matrix') ! task = 'top' - call PTrestore_rot_kinetic_matrix_elements(jrot,task,iunit,dimen,ncontr,maxcontr) - !call PTrestore_rot_kinetic_matrix_elements_mpi(jrot,task,mpiiofile,dimen,& - ! PT, PTvibrational_me_calc,grot,gcor,hvib, & - ! ncontr,maxcontr) + !call PTrestore_rot_kinetic_matrix_elements(jrot,task,iunit,dimen,ncontr,maxcontr) + call PTrestore_rot_kinetic_matrix_elements_mpi(jrot,task,mpiiofile,dimen,& + !PT, PTvibrational_me_calc,grot,gcor,hvib, & + ncontr,maxcontr) ! ! We have two calculation options: fast and cheap and slow but expensive. ! @@ -6869,15 +6869,15 @@ subroutine PThamiltonian_contract(jrot) call TimerStart('Restoring KE matrix') ! task = 'rot' - call PTrestore_rot_kinetic_matrix_elements(jrot,task,iunit,dimen,ncontr,maxcontr) - !call PTrestore_rot_kinetic_matrix_elements_mpi(jrot,task,mpiiofile,dimen,& - ! PT, PTvibrational_me_calc,grot,gcor,hvib, & - ! ncontr,maxcontr) + !call PTrestore_rot_kinetic_matrix_elements(jrot,task,iunit,dimen,ncontr,maxcontr) + call PTrestore_rot_kinetic_matrix_elements_mpi(jrot,task,mpiiofile,dimen,& + !PT, PTvibrational_me_calc,grot,gcor,hvib, & + ncontr,maxcontr) task = 'cor' - call PTrestore_rot_kinetic_matrix_elements(jrot,task,iunit,dimen,ncontr,maxcontr) - !call PTrestore_rot_kinetic_matrix_elements_mpi(jrot,task,mpiiofile,dimen,& - ! PT, PTvibrational_me_calc,grot,gcor,hvib, & - ! ncontr,maxcontr) + !call PTrestore_rot_kinetic_matrix_elements(jrot,task,iunit,dimen,ncontr,maxcontr) + call PTrestore_rot_kinetic_matrix_elements_mpi(jrot,task,mpiiofile,dimen,& + !PT, PTvibrational_me_calc,grot,gcor,hvib, & + ncontr,maxcontr) ! call TimerStop('Restoring KE matrix') ! @@ -6892,10 +6892,10 @@ subroutine PThamiltonian_contract(jrot) call TimerStart('Restoring KE matrix') ! task = 'vib' - call PTrestore_rot_kinetic_matrix_elements(jrot,task,iunit,dimen,ncontr,maxcontr) - !call PTrestore_rot_kinetic_matrix_elements_mpi(jrot,task,mpiiofile,dimen,& - ! PT, PTvibrational_me_calc,grot,gcor,hvib, & - ! ncontr,maxcontr) + !call PTrestore_rot_kinetic_matrix_elements(jrot,task,iunit,dimen,ncontr,maxcontr) + call PTrestore_rot_kinetic_matrix_elements_mpi(jrot,task,mpiiofile,dimen,& + !PT, PTvibrational_me_calc,grot,gcor,hvib, & + ncontr,maxcontr) call TimerStop('Restoring KE matrix') ! @@ -6977,7 +6977,10 @@ subroutine PThamiltonian_contract(jrot) if (job%verbose>=4) write(out,"(/' Construct the Hamiltonian matrix...')") ! task = 'top-icontr' - call PTrestore_rot_kinetic_matrix_elements(jrot,task,iunit,dimen,ncontr,maxcontr) + !call PTrestore_rot_kinetic_matrix_elements(jrot,task,iunit,dimen,ncontr,maxcontr) + call PTrestore_rot_kinetic_matrix_elements_mpi(jrot,task,mpiiofile,dimen,& + !PT, PTvibrational_me_calc,grot,gcor,hvib, & + ncontr,maxcontr) ! if (job%verbose>=5) write(out,"(' N Arrays of ',f12.5,'Gb each will be allocated (N is the number of processors)')") & real(sym%Nrepresen,rk)*real(PT%max_deg_size,rk)*real(max_dim,rk)*real(rk,rk)/1024.0_rk**3 @@ -6997,16 +7000,25 @@ subroutine PThamiltonian_contract(jrot) if (FLrotation.and.jrot/=0) then ! task = 'rot-icontr' - call PTrestore_rot_kinetic_matrix_elements(jrot,task,iunit,dimen,ncontr,maxcontr,icontr) + !call PTrestore_rot_kinetic_matrix_elements(jrot,task,iunit,dimen,ncontr,maxcontr,icontr) + call PTrestore_rot_kinetic_matrix_elements_mpi(jrot,task,mpiiofile,dimen,& + !PT, PTvibrational_me_calc,grot,gcor,hvib, & + ncontr,maxcontr) task = 'cor-icontr' - call PTrestore_rot_kinetic_matrix_elements(jrot,task,iunit,dimen,ncontr,maxcontr,icontr) + !call PTrestore_rot_kinetic_matrix_elements(jrot,task,iunit,dimen,ncontr,maxcontr,icontr) + call PTrestore_rot_kinetic_matrix_elements_mpi(jrot,task,mpiiofile,dimen,& + !PT, PTvibrational_me_calc,grot,gcor,hvib, & + ncontr,maxcontr) ! endif ! if ( PTvibrational_me_calc ) then ! task = 'vib-icontr' - call PTrestore_rot_kinetic_matrix_elements(jrot,task,iunit,dimen,ncontr,maxcontr,icontr) + !call PTrestore_rot_kinetic_matrix_elements(jrot,task,iunit,dimen,ncontr,maxcontr,icontr) + call PTrestore_rot_kinetic_matrix_elements_mpi(jrot,task,mpiiofile,dimen,& + !PT, PTvibrational_me_calc,grot,gcor,hvib, & + ncontr,maxcontr) ! endif ! @@ -7080,7 +7092,10 @@ subroutine PThamiltonian_contract(jrot) ! task = 'rot' ! - call PTrestore_rot_kinetic_matrix_elements(jrot,task,iunit,dimen,ncontr,maxcontr) + !call PTrestore_rot_kinetic_matrix_elements(jrot,task,iunit,dimen,ncontr,maxcontr) + call PTrestore_rot_kinetic_matrix_elements_mpi(jrot,task,mpiiofile,dimen,& + !PT, PTvibrational_me_calc,grot,gcor,hvib, & + ncontr,maxcontr) ! !$omp parallel private(mat_t,alloc_p) allocate (mat_t(sym%Nrepresen,PT%max_deg_size,max_dim),stat=alloc_p) @@ -7146,7 +7161,10 @@ subroutine PThamiltonian_contract(jrot) ! task = 'cor' ! - call PTrestore_rot_kinetic_matrix_elements(jrot,task,iunit,dimen,ncontr,maxcontr) + !call PTrestore_rot_kinetic_matrix_elements(jrot,task,iunit,dimen,ncontr,maxcontr) + call PTrestore_rot_kinetic_matrix_elements_mpi(jrot,task,mpiiofile,dimen,& + !PT, PTvibrational_me_calc,grot,gcor,hvib, & + ncontr,maxcontr) ! !$omp parallel private(mat_t,alloc_p) allocate (mat_t(sym%Nrepresen,PT%max_deg_size,max_dim),stat=alloc_p) @@ -7218,7 +7236,10 @@ subroutine PThamiltonian_contract(jrot) ! task = 'vib' ! - call PTrestore_rot_kinetic_matrix_elements(jrot,task,iunit,dimen,ncontr,maxcontr) + !call PTrestore_rot_kinetic_matrix_elements(jrot,task,iunit,dimen,ncontr,maxcontr) + call PTrestore_rot_kinetic_matrix_elements_mpi(jrot,task,mpiiofile,dimen,& + !PT, PTvibrational_me_calc,grot,gcor,hvib, & + ncontr,maxcontr) ! !$omp parallel private(mat_t,alloc_p) allocate (mat_t(sym%Nrepresen,PT%max_deg_size,max_dim),stat=alloc_p) @@ -7685,8 +7706,8 @@ subroutine open_chkptfile_mpi(fileh, filename, mode) end select call MPI_File_Open(mpi_comm_world, filename, amode, mpi_info_null, fileh, ierr) - if (ierr) then - if (mpi_rank .eq. 0) write(*,*) "Error opening MPI-IO-formatted Vib. kinetic checkpoint file." + if (ierr.gt.0) then + if (mpi_rank .eq. 0) write(*,*) "Error opening MPI-IO-formatted Vib. kinetic checkpoint file.", filename stop "MPI_PTrestore_rot_kinetic_matrix_elements - Error opening MATELEM MPI-IO input file" endif @@ -7702,14 +7723,14 @@ subroutine close_chkptfile_mpi(fileh) integer :: ierr call mpi_file_close(fileh, ierr) - if (ierr) then + if (ierr.gt.0) then if (mpi_rank .eq. 0) write(*,*) "Error closing MPI-IO-formatted Vib. kinetic checkpoint file." stop "MPI_PTrestore_rot_kinetic_matrix_elements - Error closing MATELEM MPI-IO input file" endif end subroutine close_chkptfile_mpi subroutine PTrestore_rot_kinetic_matrix_elements_MPI(jrot, task, fileh, dimen, & - PT, PTvibrational_me_calc,grot,gcor,hvib, & + !PT, PTvibrational_me_calc,grot,gcor,hvib, & ncontr, maxcontr, icontr) use mpi_f08 use mpi_aux @@ -7718,10 +7739,10 @@ subroutine PTrestore_rot_kinetic_matrix_elements_MPI(jrot, task, fileh, dimen, & type(MPI_File),intent(inout) :: fileh integer(ik),intent(in) :: dimen - type(PTelementsT),intent(in) :: PT - logical,intent(in) :: PTvibrational_me_calc - type(PTcontrME),pointer :: grot(:,:),gcor(:) ! rot. kinetic part - type(PTcontrME) :: hvib ! rot. kinetic part + !type(PTelementsT),intent(in) :: PT + !logical,intent(in) :: PTvibrational_me_calc + !type(PTcontrME),pointer :: grot(:,:),gcor(:) ! rot. kinetic part + !type(PTcontrME) :: hvib ! rot. kinetic part integer(ik),intent(inout),optional :: ncontr integer(ik),intent(inout),optional :: maxcontr @@ -7730,7 +7751,7 @@ subroutine PTrestore_rot_kinetic_matrix_elements_MPI(jrot, task, fileh, dimen, & type(MPI_File) :: fileh_slice character(len=cl) :: job_id,filename,readbuf integer(kind=MPI_Offset_kind) :: file_offset - integer :: ierr + integer :: ierr, mpi_real_size, mpi_int_size integer(hik) :: rootsize,rootsize_,rootsize2,rootsize2_,nprocs,tid,icontr1,icontr2 integer(ik),allocatable :: imat_t(:,:) real(rk),allocatable :: mat_t(:,:),mat_(:,:) @@ -7742,7 +7763,18 @@ subroutine PTrestore_rot_kinetic_matrix_elements_MPI(jrot, task, fileh, dimen, & if ( trim(job%IOswap_matelem)/='NONE') return - if (mpi_rank .eq. 0 .and. (job%verbose>=6.and.present(icontr)) ) write(out,"('icontr = ',i)") icontr + rootsize = int(ncontr*(ncontr+1)/2,hik) + rootsize_ = int(maxcontr*(maxcontr+1)/2,hik) + ! + rootsize2 = int(ncontr*ncontr,hik) + rootsize2_ = int(maxcontr*maxcontr,hik) + ! + !dimen = max(min(int(PT%Maxcontracts*job%compress),PT%Maxcontracts),1) + + call MPI_Type_size(mpi_double_precision, mpi_real_size,ierr) + call MPI_Type_size(mpi_integer, mpi_real_size,ierr) + + if (mpi_rank .eq. 0 .and. (job%verbose>=6.and.present(icontr)) ) write(out,"('icontr = ',i9)") icontr if (mpi_rank .eq. 0) write(*,*) "DEBUG|TASK =", task @@ -7754,7 +7786,8 @@ subroutine PTrestore_rot_kinetic_matrix_elements_MPI(jrot, task, fileh, dimen, & !call IOStart(trim(job_id),fileh) !TODO set filename dynamically - filename = 'mpiiofile' + filename = trim(job%matelem_suffix)//'.chk' + write(*,*) "FILENAME", filename call open_chkptfile_mpi(fileh, filename, 'read') @@ -7779,7 +7812,7 @@ subroutine PTrestore_rot_kinetic_matrix_elements_MPI(jrot, task, fileh, dimen, & if (jrot==0.and.PT%Maxcontracts/=ncontr) then if (mpi_rank .eq. 0) write (out,"(' Vib. kinetic checkpoint file ',a)") job%kinetmat_file - if (mpi_rank .eq. 0) write (out,"(' Actual and stored basis sizes at J=0 do not agree ',2i)") PT%Maxcontracts,ncontr + if (mpi_rank .eq. 0) write (out,"(' Actual and stored basis sizes at J=0 do not agree ',2i9)") PT%Maxcontracts,ncontr call mpi_barrier(mpi_comm_world, ierr) stop 'PTrestore_rot_kinetic_matrix_elements - in file - illegal nroots ' end if @@ -7793,8 +7826,8 @@ subroutine PTrestore_rot_kinetic_matrix_elements_MPI(jrot, task, fileh, dimen, & if (.not.FLrotation.or.jrot==0) then - allocate (imat_t(0:PT%Nclasses,ncontr),stat=ierr) - call ArrayStart('mat_t',ierr,size(imat_t),kind(imat_t)) + !allocate (imat_t(0:PT%Nclasses,ncontr),stat=ierr) + !call ArrayStart('mat_t',ierr,size(imat_t),kind(imat_t)) call MPI_File_read_all(fileh, readbuf, 10, mpi_character, mpi_status_ignore, ierr) if (readbuf(1:10)/='icontr_cnu') then @@ -7803,16 +7836,22 @@ subroutine PTrestore_rot_kinetic_matrix_elements_MPI(jrot, task, fileh, dimen, & stop 'PTrestore_rot_kinetic_matrix_elements - in file - icontr_cnu missing' end if - call MPI_File_read_all(fileh, imat_t, (PT%Nclasses+1)*ncontr, mpi_integer, mpi_status_ignore, ierr) + !call mpi_file_get_size(fileh,file_offset,ierr) + file_offset = (PT%Nclasses+1)*ncontr*mpi_int_size + call mpi_file_seek(fileh, file_offset, MPI_SEEK_CUR) - call MPI_File_read_all(fileh, readbuf, 10, mpi_character, mpi_status_ignore, ierr) + !call MPI_File_read_all(fileh, imat_t, (PT%Nclasses+1)*ncontr, mpi_integer, mpi_status_ignore, ierr) + + call MPI_File_read_all(fileh, readbuf, 11, mpi_character, mpi_status_ignore, ierr) if (readbuf(1:11)/='icontr_ideg') then if (mpi_rank .eq. 0) write (out,"(' Vib. kinetic checkpoint file ',a,': icontr_ideg is missing ',a)") job%kinetmat_file,readbuf(1:11) call mpi_barrier(mpi_comm_world, ierr) stop 'PTrestore_rot_kinetic_matrix_elements - in file - icontr_ideg missing' end if - call MPI_File_read_all(fileh, imat_t, (PT%Nclasses+1)*ncontr, mpi_integer, mpi_status_ignore, ierr) + !call MPI_File_read_all(fileh, imat_t, (PT%Nclasses+1)*ncontr, mpi_integer, mpi_status_ignore, ierr) + file_offset = (PT%Nclasses+1)*ncontr*mpi_int_size + call mpi_file_seek(fileh, file_offset, MPI_SEEK_CUR) deallocate(imat_t) @@ -7872,12 +7911,12 @@ subroutine PTrestore_rot_kinetic_matrix_elements_MPI(jrot, task, fileh, dimen, & ! reconstruct the correlation between the vib. indices for J=0 and current J - !TODO_FOREIGNFUNCTION! call Find_groundstate_icontr(maxcontr) + call Find_groundstate_icontr(maxcontr) if (mpi_rank .eq. 0 .and. job%verbose>=4) write(out,"(' ...done!')") if (mpi_rank .eq. 0 .and. job%verbose>=4.and.maxcontr/=ncontr) then - write (out,"(' The contracted basis set is reduced: ',i,' -> ',i)") ncontr,maxcontr + write (out,"(' The contracted basis set is reduced: ',i9,' -> ',i9)") ncontr,maxcontr end if if (maxcontr>ncontr) then @@ -8022,7 +8061,7 @@ subroutine PTrestore_rot_kinetic_matrix_elements_MPI(jrot, task, fileh, dimen, & ! endif ! - if (mpi_rank .eq. 0 .and. job%verbose>=6) write(out,"(' rootsize_,rootsize = ',2i)") rootsize_,rootsize + if (mpi_rank .eq. 0 .and. job%verbose>=6) write(out,"(' rootsize_,rootsize = ',2i9)") rootsize_,rootsize ! call MPI_File_read_all(fileh, readbuf, 4, mpi_character, mpi_status_ignore, ierr) if (readbuf(1:4)/='hvib') then @@ -8059,7 +8098,8 @@ subroutine PTrestore_rot_kinetic_matrix_elements_MPI(jrot, task, fileh, dimen, & ! if (mpi_rank .eq. 0 .and. job%verbose>=4) write(out,"(' ...done!')") ! - !!case('top-icontr') + case('top-icontr') + write(*,*) "CASE TOP-ICONTR" !! ! !! nprocs = 1 !! tid = 0 @@ -8091,7 +8131,8 @@ subroutine PTrestore_rot_kinetic_matrix_elements_MPI(jrot, task, fileh, dimen, & !! ! !! endif !! ! - !!case('rot-icontr') ! rotational part for the vib-rot contraction scheme + case('rot-icontr') ! rotational part for the vib-rot contraction scheme + write(*,*) "CASE ROT-ICONTR" !! ! !! ! Read the rotational part only !! ! @@ -8128,7 +8169,8 @@ subroutine PTrestore_rot_kinetic_matrix_elements_MPI(jrot, task, fileh, dimen, & !! enddo !! enddo !! ! - !!case('cor-icontr') ! corriolis part for the vib-rot contraction scheme + case('cor-icontr') ! corriolis part for the vib-rot contraction scheme + write(*,*) "CASE COR-ICONTR" !! ! !! ! Read the Corriolis part only !! ! @@ -8169,7 +8211,8 @@ subroutine PTrestore_rot_kinetic_matrix_elements_MPI(jrot, task, fileh, dimen, & !! ! !! enddo !! ! - !!case('vib-icontr') ! vibrational part for the vib-rot contraction scheme + case('vib-icontr') ! vibrational part for the vib-rot contraction scheme + write(*,*) "CASE VIB-ICONTR" !! ! !! ! !! !if (job%verbose>=4.and.irow==0) write(out,"(' Read and process vibrational part...')") @@ -8236,23 +8279,30 @@ subroutine PTrestore_rot_kinetic_matrix_elements_MPI(jrot, task, fileh, dimen, & subroutine divided_slice_open_mpi(islice,fileh,chkpt_type,suffix) use mpi_f08 use mpi_aux + implicit none integer(ik),intent(in) :: islice type(MPI_File),intent(inout) :: fileh character(len=*),intent(in) :: chkpt_type,suffix integer(ik) :: ilen - character(len=cl) :: readbuf,filename,islice_c + character(len=cl) :: readbuf,filename,jchar integer :: ierr - write(islice_c, '(i4)') islice + if (.not.job%IOmatelem_split) return + ! + !write(job_is,"('single swap_matrix')") ! - filename = trim(suffix)//trim(adjustl(islice_c))//'.chk' + !call IOStart(trim(job_is),chkptIO) + ! + write(jchar, '(i4)') islice + ! + filename = trim(suffix)//trim(adjustl(jchar))//'.chk' ! call open_chkptfile_mpi(fileh, filename, 'read') - + ! ilen = LEN_TRIM(chkpt_type) - - call MPI_File_read_all(fileh, readbuf, 5, mpi_character, mpi_status_ignore, ierr) + ! + call MPI_File_read_all(fileh, readbuf, ilen, mpi_character, mpi_status_ignore, ierr) if ( trim(readbuf(1:ilen))/=trim(chkpt_type) ) then if (mpi_rank .eq. 0) write (out,"(' kinetic checkpoint slice ',a20,': header is missing or wrong',a)") filename,readbuf(1:ilen) stop 'PTrestore_rot_kinetic_matrix_elements - in slice - header missing or wrong' @@ -8284,7 +8334,7 @@ subroutine divided_slice_close_mpi(islice,fileh,chkpt_type) end subroutine divided_slice_close_mpi !!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - !!!!!!!!! MPIIO !!!!!!!!!!!!!! + !!!!!!!! POSIX IO !!!!!!!!!!!! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!! ! ! Here we restore the vibrational (J=0) matrix elements of the rotational kinetic part G_rot and G_cor @@ -9459,7 +9509,7 @@ recursive subroutine transfer_to_symmetric_representatoin(irow,jrow,ijterm,hcont end subroutine transfer_to_symmetric_representatoin - subroutine diagonalization_contract(jrot,gamma,dimen_s,mat,zpe,rlevel,total_roots,bterm,k_row) + subroutine diagonalization_contract(jrot,gamma,dimen_s,mat,zpe,rlevel,total_roots,bterm,k_row) integer(ik),intent(in) :: jrot,gamma,dimen_s real(rk),intent(inout) :: zpe @@ -9784,6 +9834,7 @@ subroutine diagonalization_contract(jrot,gamma,dimen_s,mat,zpe,rlevel,total_roo ! ! diagonalization schemes ! + write(*,*) "DIAGONALIZER", trim(job%diagonalizer) select case (trim(job%diagonalizer)) ! case default @@ -15150,7 +15201,7 @@ subroutine PTcontracted_matelem_class(jrot) integer :: startdim, enddim, blocksize_, ierr, b, req_count, offset type(MPI_Request),allocatable :: reqs(:) type(MPI_Status) :: reqstat - type(MPI_File) :: chkptMPIIO + type(MPI_File) :: chkptMPIIO, chkptMPIIO_ integer(kind=MPI_Offset_kind) :: mpioffset integer :: mpisz ! @@ -15243,7 +15294,7 @@ subroutine PTcontracted_matelem_class(jrot) mdimen_p = int(1+real(mdimen/comm_size)) mdimen_b = comm_size*mdimen_p - write(*,*) "DIMS", mpi_rank, mdimen, mdimen_b, mdimen_p, comm_size*mdimen_b*mdimen_p + !DEBUG!write(*,*) "DIMS", mpi_rank, mdimen, mdimen_b, mdimen_p, comm_size*mdimen_b*mdimen_p blocksize = blocksize_ ! ! The vibrational (J=0) matrix elements of the rotational and coriolis @@ -15271,31 +15322,34 @@ subroutine PTcontracted_matelem_class(jrot) ! ! Prepare the checkpoint file ! - !POSIXIO!job_is ='Vib. matrix elements of the rot. kinetic part' - !POSIXIO!call IOStart(trim(job_is),chkptIO) + job_is ='Vib. matrix elements of the rot. kinetic part' + if (trim(job%kinetmat_format).eq.'MPIIO') then + call MPI_File_open(mpi_comm_world, job%kinetmat_file, mpi_mode_wronly+mpi_mode_create, mpi_info_null, chkptMPIIO, ierr) + call MPI_File_set_errhandler(chkptMPIIO, MPI_ERRORS_ARE_FATAL) + mpioffset=0 + call MPI_File_set_size(chkptMPIIO, mpioffset, ierr) + if (mpi_rank.eq.0) then !AT + call TimerStart('mpiiosingle') !AT - !POSIXIO!open(chkptIO,form='unformatted',action='write',position='rewind',status='replace',file=job%kinetmat_file) - !POSIXIO!write(chkptIO) 'Start Kinetic part' - !POSIXIO!! - !POSIXIO!! store the bookkeeping information about the contr. basis set - !POSIXIO!! - !POSIXIO!call PTstore_icontr_cnu(PT%Maxcontracts,chkptIO,job%IOkinet_action) - ! - call MPI_File_open(mpi_comm_world, 'mpiiofile', mpi_mode_wronly+mpi_mode_create, mpi_info_null, chkptMPIIO, ierr) - call MPI_File_set_errhandler(chkptMPIIO, MPI_ERRORS_ARE_FATAL) - mpioffset=0 - call MPI_File_set_size(chkptMPIIO, mpioffset, ierr) - if (mpi_rank.eq.0) then !AT - call TimerStart('mpiiosingle') !AT + call MPI_File_write(chkptMPIIO,'[MPIIO]',7,mpi_character,mpi_status_ignore,ierr) + call MPI_File_write(chkptMPIIO,'Start Kinetic part',18,mpi_character,mpi_status_ignore,ierr) + ! + ! store the bookkeeping information about the contr. basis set + ! + call PTstoreMPI_icontr_cnu(PT%Maxcontracts,chkptMPIIO,job%IOkinet_action) - call MPI_File_write(chkptMPIIO,'[MPIIO]',7,mpi_character,mpi_status_ignore,ierr) - call MPI_File_write(chkptMPIIO,'Start Kinetic part',18,mpi_character,mpi_status_ignore,ierr) + call TimerStop('mpiiosingle') !AT + endif + else + call IOStart(trim(job_is),chkptIO) + ! + open(chkptIO,form='unformatted',action='write',position='rewind',status='replace',file=job%kinetmat_file) + write(chkptIO) 'Start Kinetic part' ! ! store the bookkeeping information about the contr. basis set ! - call PTstoreMPI_icontr_cnu(PT%Maxcontracts,chkptMPIIO,job%IOkinet_action) - - call TimerStop('mpiiosingle') !AT + call PTstore_icontr_cnu(PT%Maxcontracts,chkptIO,job%IOkinet_action) + ! endif endif ! @@ -15474,10 +15528,13 @@ subroutine PTcontracted_matelem_class(jrot) ! if (trim(job%IOkinet_action)=='SAVE'.and..not.job%IOmatelem_split) then ! - !POSIXIO!write(chkptIO) 'g_rot' - if(mpi_rank.eq.0) then - call MPI_File_seek(chkptMPIIO, mpioffset, MPI_SEEK_END) - call MPI_File_write(chkptMPIIO,'g_rot',5,mpi_character,mpi_status_ignore,ierr) + if (trim(job%kinetmat_format).eq.'MPIIO') then + if(mpi_rank.eq.0) then + call MPI_File_seek(chkptMPIIO, mpioffset, MPI_SEEK_END) + call MPI_File_write(chkptMPIIO,'g_rot',5,mpi_character,mpi_status_ignore,ierr) + endif + else + write(chkptIO) 'g_rot' endif ! endif @@ -15492,7 +15549,6 @@ subroutine PTcontracted_matelem_class(jrot) ! ! create column datatype for MPI-IO ! TODO clean up - call co_create_type(mdimen) do k1 = 1,3 do k2 = 1,3 ! @@ -15530,20 +15586,27 @@ subroutine PTcontracted_matelem_class(jrot) ! call co_distr_data(grot_t, recvbuf, mdimen_p, startdim, enddim) ! - !POSIXIO!if (trim(job%IOkinet_action)=='SAVE') then - !POSIXIO! if (job%IOmatelem_split) then - !POSIXIO! ! - !POSIXIO! call write_divided_slice(islice,'g_rot',job%matelem_suffix,mdimen,grot_t) - !POSIXIO! ! - !POSIXIO! else - !POSIXIO! ! - !POSIXIO! ! store the matrix elements - !POSIXIO! ! - !POSIXIO! write(chkptIO) grot_t - !POSIXIO! ! - !POSIXIO! endif - !POSIXIO!endif - call co_write_matrix_distr(grot_t,mdimen, startdim, enddim,chkptMPIIO) + if (trim(job%IOkinet_action)=='SAVE') then + if (job%IOmatelem_split) then + ! + if (trim(job%kinetmat_format).eq.'MPIIO') then + call write_divided_slice_mpi(islice,'g_rot',job%matelem_suffix,mdimen,grot_t) + else + call write_divided_slice(islice,'g_rot',job%matelem_suffix,mdimen,grot_t) + endif + ! + else + ! + ! store the matrix elements + ! + if (trim(job%kinetmat_format).eq.'MPIIO') then + call co_write_matrix_distr(grot_t,mdimen, startdim, enddim,chkptMPIIO) + else + write(chkptIO) grot_t + endif + ! + endif + endif ! enddo enddo @@ -15552,10 +15615,13 @@ subroutine PTcontracted_matelem_class(jrot) ! if (trim(job%IOkinet_action)=='SAVE'.and..not.job%IOmatelem_split) then ! - !POSIXIO!write(chkptIO) 'g_cor' - if(mpi_rank.eq.0) then - call MPI_File_seek(chkptMPIIO, mpioffset, MPI_SEEK_END) - call MPI_File_write(chkptMPIIO,'g_cor',5,mpi_character,mpi_status_ignore,ierr) + if (trim(job%kinetmat_format).eq.'MPIIO') then + if(mpi_rank.eq.0) then + call MPI_File_seek(chkptMPIIO, mpioffset, MPI_SEEK_END) + call MPI_File_write(chkptMPIIO,'g_cor',5,mpi_character,mpi_status_ignore,ierr) + endif + else + write(chkptIO) 'g_cor' endif ! endif @@ -15616,7 +15682,11 @@ subroutine PTcontracted_matelem_class(jrot) ! if (job%IOmatelem_divide) then ! - call write_divided_slice(islice,'g_cor',job%matelem_suffix,mdimen,grot_t) + if (trim(job%kinetmat_format).eq.'MPIIO') then + call write_divided_slice_mpi(islice,'g_cor',job%matelem_suffix,mdimen,grot_t) + else + call write_divided_slice(islice,'g_cor',job%matelem_suffix,mdimen,grot_t) + endif ! else ! @@ -15630,21 +15700,28 @@ subroutine PTcontracted_matelem_class(jrot) ! enddo ! - !POSIXIO!if (trim(job%IOkinet_action)=='SAVE') then - !POSIXIO! ! - !POSIXIO! if (job%IOmatelem_split) then - !POSIXIO! ! - !POSIXIO! call write_divided_slice(islice,'g_cor',job%matelem_suffix,mdimen,gcor_t) - !POSIXIO! ! - !POSIXIO! else - !POSIXIO! ! - !POSIXIO! ! store the matrix elements - !POSIXIO! ! - !POSIXIO! write(chkptIO) gcor_t - !POSIXIO! ! - !POSIXIO! endif - !POSIXIO!endif - call co_write_matrix_distr(gcor_t,mdimen, startdim, enddim,chkptMPIIO) + if (trim(job%IOkinet_action)=='SAVE') then + ! + if (job%IOmatelem_split) then + ! + if (trim(job%kinetmat_format).eq.'MPIIO') then + call write_divided_slice_mpi(islice,'g_cor',job%matelem_suffix,mdimen,gcor_t) + else + call write_divided_slice(islice,'g_cor',job%matelem_suffix,mdimen,gcor_t) + endif + ! + else + ! + ! store the matrix elements + ! + if (trim(job%kinetmat_format).eq.'MPIIO') then + call co_write_matrix_distr(gcor_t,mdimen, startdim, enddim,chkptMPIIO) + else + write(chkptIO) gcor_t + endif + ! + endif + endif ! enddo ! @@ -15808,7 +15885,11 @@ subroutine PTcontracted_matelem_class(jrot) enddo !$omp end parallel do ! - call write_divided_slice(islice,'g_vib',job%matelem_suffix,mdimen,gvib_t) + if (trim(job%kinetmat_format).eq.'MPIIO') then + call write_divided_slice_mpi(islice,'g_vib',job%matelem_suffix,mdimen,gvib_t) + else + call write_divided_slice(islice,'g_vib',job%matelem_suffix,mdimen,gvib_t) + endif ! else ! @@ -15881,7 +15962,11 @@ subroutine PTcontracted_matelem_class(jrot) ! islice = (PT%Nmodes+3)*3+PT%Nmodes**2+1 ! - call write_divided_slice(islice,'g_vib',job%matelem_suffix,mdimen,gvib_t) + if (trim(job%kinetmat_format).eq.'MPIIO') then + call write_divided_slice_mpi(islice,'g_vib',job%matelem_suffix,mdimen,gvib_t) + else + call write_divided_slice(islice,'g_vib',job%matelem_suffix,mdimen,gvib_t) + endif ! if (job%IOmatelem_split.and.job%iswap(1)==1) job%iswap(1)=0 ! @@ -15899,25 +15984,53 @@ subroutine PTcontracted_matelem_class(jrot) ! f_t = -0.5_rk ! - do islice = iterm1,iterm2 - ! - if (islice==(PT%Nmodes+3)*3+PT%Nmodes**2+1) f_t = 1.0_rk - ! - call divided_slice_open(islice,chkptIO_,'g_vib',job%matelem_suffix) - ! - read(chkptIO_) gvib_t - ! - !$omp parallel do private(icoeff,jcoeff) shared(hvib_t) schedule(dynamic) - do icoeff=1,mdimen - do jcoeff=1,icoeff - hvib_t(jcoeff,icoeff) = hvib_t(jcoeff,icoeff)+f_t*gvib_t(jcoeff,icoeff) + if (trim(job%kinetmat_format).eq.'MPIIO') then + do islice = iterm1,iterm2 + ! + if (islice==(PT%Nmodes+3)*3+PT%Nmodes**2+1) f_t = 1.0_rk + ! + !! TODO !! + write(*,*) "TODO: NEEDS VERIFICATION" + call divided_slice_open_mpi(islice,chkptMPIIO_,'g_vib',job%matelem_suffix) + ! + call co_read_matrix_distr(gvib_t, mdimen, startdim, enddim, chkptMPIIO_) + ! + do b=1,comm_size + if (send_or_recv(b).ge.0) then + !$omp parallel do private(icoeff,jcoeff) shared(b,grot_t) schedule(static) + do icoeff=startdim,enddim + do jcoeff=((b-1)*mdimen_p)+1,b*mdimen_p + if (jcoeff .gt. PT%Maxcontracts) cycle + hvib_t(jcoeff,icoeff) = hvib_t(jcoeff,icoeff)+f_t*gvib_t(jcoeff,icoeff) + enddo + enddo + endif enddo + ! + call divided_slice_close_mpi(islice,chkptMPIIO_,'g_vib') + ! enddo - !$omp end parallel do - ! - call divided_slice_close(islice,chkptIO_,'g_vib') - ! - enddo + else + do islice = iterm1,iterm2 + ! + if (islice==(PT%Nmodes+3)*3+PT%Nmodes**2+1) f_t = 1.0_rk + ! + call divided_slice_open(islice,chkptIO_,'g_vib',job%matelem_suffix) + ! + read(chkptIO_) gvib_t + ! + !$omp parallel do private(icoeff,jcoeff) shared(hvib_t) schedule(dynamic) + do icoeff=1,mdimen + do jcoeff=1,icoeff + hvib_t(jcoeff,icoeff) = hvib_t(jcoeff,icoeff)+f_t*gvib_t(jcoeff,icoeff) + enddo + enddo + !$omp end parallel do + ! + call divided_slice_close(islice,chkptIO_,'g_vib') + ! + enddo + endif ! gvib_t = 0 ! @@ -15960,14 +16073,17 @@ subroutine PTcontracted_matelem_class(jrot) if ((trim(job%IOkinet_action)=='SAVE'.or.trim(job%IOkinet_action)=='VIB_SAVE').and. & (.not.job%IOmatelem_divide.or.job%iswap(1)==0) .and. & (.not.job%IOmatelem_split.or.job%iswap(1)==0)) then - ! - !POSIXIO!write(chkptIO) 'hvib' - !POSIXIO!write(chkptIO) hvib%me + ! + if (trim(job%kinetmat_format).eq.'MPIIO') then if(mpi_rank.eq.0) then call MPI_File_seek(chkptMPIIO, mpioffset, MPI_SEEK_END) call MPI_File_write(chkptMPIIO,'hvib',4,mpi_character,mpi_status_ignore,ierr) endif - call co_write_matrix_distr(hvib%me,mdimen, startdim, enddim,chkptMPIIO) + call co_write_matrix_distr(hvib%me,mdimen, startdim, enddim,chkptMPIIO) + else + write(chkptIO) 'hvib' + write(chkptIO) hvib%me + endif ! endif ! @@ -15979,13 +16095,16 @@ subroutine PTcontracted_matelem_class(jrot) (.not.job%IOmatelem_divide.or.job%iswap(1)==0 ).and. & (.not.job%IOmatelem_split.or.job%iswap(1)==0 ) ) then ! - !POSIXIO!write(chkptIO) 'End Kinetic part' - !POSIXIO!close(chkptIO,status='keep') + if (trim(job%kinetmat_format).eq.'MPIIO') then if(mpi_rank.eq.0) then call MPI_File_seek(chkptMPIIO, mpioffset, MPI_SEEK_END) call MPI_File_write(chkptMPIIO,'End Kinetic Part',16,mpi_character,mpi_status_ignore,ierr) endif - call MPI_File_close(chkptMPIIO, ierr) + call MPI_File_close(chkptMPIIO, ierr) + else + write(chkptIO) 'End Kinetic part' + close(chkptIO,status='keep') + endif ! endif ! @@ -16011,29 +16130,34 @@ subroutine PTcontracted_matelem_class(jrot) ! Prepare the checkpoint file ! job_is ='external field contracted matrix elements for J=0' - !POSIXIO!call IOStart(trim(job_is),chkptIO) - !POSIXIO!! - !POSIXIO!open(chkptIO,form='unformatted',action='write',position='rewind',status='replace',file=job%extFmat_file) - !POSIXIO!write(chkptIO) 'Start external field' - !POSIXIO!! - !POSIXIO!! store the matrix elements - !POSIXIO!! - !POSIXIO!write(chkptIO) PT%Maxcontracts - call MPI_File_open(mpi_comm_world, 'mpiioEXTfile', mpi_mode_wronly+mpi_mode_create, mpi_info_null, chkptMPIIO, ierr) - mpioffset=0 - call MPI_File_set_size(chkptMPIIO, mpioffset, ierr) - if (mpi_rank.eq.0) then !AT - call TimerStart('mpiiosingle') !AT + if (trim(job%kinetmat_format).eq.'MPIIO') then + call MPI_File_open(mpi_comm_world, job%extFmat_file, mpi_mode_wronly+mpi_mode_create, mpi_info_null, chkptMPIIO, ierr) + mpioffset=0 + call MPI_File_set_size(chkptMPIIO, mpioffset, ierr) + if (mpi_rank.eq.0) then !AT + call TimerStart('mpiiosingle') !AT - call MPI_File_write(chkptMPIIO,'Start external field',20,mpi_character,mpi_status_ignore,ierr) + call MPI_File_write(chkptMPIIO,'[MPIIO]',7,mpi_character,mpi_status_ignore,ierr) + call MPI_File_write(chkptMPIIO,'Start external field',20,mpi_character,mpi_status_ignore,ierr) + call MPI_File_write(chkptMPIIO, PT%Maxcontracts, 1, mpi_integer, mpi_status_ignore, ierr) + ! + ! store the bookkeeping information about the contr. basis set + ! + !call PTstoreMPI_icontr_cnu(PT%Maxcontracts,chkptMPIIO,job%IOkinet_action) + + call TimerStop('mpiiosingle') !AT + ! + endif + else + call IOStart(trim(job_is),chkptIO) ! - ! store the bookkeeping information about the contr. basis set + open(chkptIO,form='unformatted',action='write',position='rewind',status='replace',file=job%extFmat_file) + write(chkptIO) 'Start external field' ! - call PTstoreMPI_icontr_cnu(PT%Maxcontracts,chkptMPIIO,job%IOkinet_action) - - call TimerStop('mpiiosingle') !AT + ! store the matrix elements ! - endif + write(chkptIO) PT%Maxcontracts + endif ! endif ! @@ -16077,21 +16201,33 @@ subroutine PTcontracted_matelem_class(jrot) ! call co_distr_data(extF_t, recvbuf, mdimen_p, startdim, enddim) ! - !POSIXIO!if (job%IOextF_divide) then - !POSIXIO! ! - !POSIXIO! call write_divided_slice(imu,'extF',job%extmat_suffix,mdimen,extF_t) - !POSIXIO! ! - !POSIXIO!else - !POSIXIO! ! - !POSIXIO! ! always store the matrix elements of the extF moment - !POSIXIO! ! - !POSIXIO! write(chkptIO) imu - !POSIXIO! ! - !POSIXIO! write(chkptIO) extF_t - !POSIXIO! ! - !POSIXIO!endif - if(mpi_rank.eq.0) call MPI_File_write(chkptMPIIO,imu,1,mpi_integer,mpi_status_ignore,ierr) - call co_write_matrix_distr(extF_t,mdimen, startdim, enddim,chkptMPIIO) + if (job%IOextF_divide) then + ! + if (trim(job%kinetmat_format).eq.'MPIIO') then + call write_divided_slice_mpi(imu,'extF',job%extmat_suffix,mdimen,extF_t) + else + call write_divided_slice(imu,'extF',job%extmat_suffix,mdimen,extF_t) + endif + ! + else + ! + ! always store the matrix elements of the extF moment + ! + if (trim(job%kinetmat_format).eq.'MPIIO') then + if(mpi_rank.eq.0) then + call MPI_File_seek(chkptMPIIO, mpioffset, MPI_SEEK_END) + call MPI_File_write(chkptMPIIO,imu,1,mpi_integer,mpi_status_ignore,ierr) + endif + call mpi_barrier(mpi_comm_world,ierr) + ! + call co_write_matrix_distr(extF_t,mdimen, startdim, enddim,chkptMPIIO) + else + write(chkptIO) imu + ! + write(chkptIO) extF_t + endif + ! + endif ! if (job%verbose>=4) write(out,"('...done')",advance='YES') ! @@ -16132,11 +16268,18 @@ subroutine PTcontracted_matelem_class(jrot) ! endif ! - !POSIXIO!if (.not.job%IOextF_divide) write(chkptIO) 'End external field' - if (mpi_rank.eq.0) then !AT - if(.not.job%IOextF_divide) call MPI_File_write(chkptMPIIO,'End external field',18,mpi_character,mpi_status_ignore,ierr) + call mpi_barrier(mpi_comm_world,ierr) + if (trim(job%kinetmat_format).eq.'MPIIO') then + if (mpi_rank.eq.0) then !AT + if(.not.job%IOextF_divide) then + call MPI_File_seek(chkptMPIIO, mpioffset, MPI_SEEK_END) + call MPI_File_write(chkptMPIIO,'End external field',18,mpi_character,mpi_status_ignore,ierr) + endif + endif + call MPI_File_close(chkptMPIIO, ierr) + else + if (.not.job%IOextF_divide) write(chkptIO) 'End external field' endif - call MPI_File_close(chkptMPIIO, ierr) ! endif ! @@ -16257,6 +16400,40 @@ subroutine write_divided_slice(islice,name,suffix,N,field) ! end subroutine write_divided_slice + subroutine write_divided_slice_mpi(islice,name,suffix,N,field) + ! + integer(ik),intent(in) :: islice + character(len=*),intent(in) :: name,suffix + integer(ik),intent(in) :: N + real(rk),intent(in) :: field(N,N) + character(len=4) :: jchar + character(len=cl) :: filename + character(len=cl) :: job_is + type(MPI_File) :: chkptMPIIO + integer(kind=MPI_OFFSET_KIND) :: offset + integer :: ierr + ! + write(job_is,"('single swap_matrix')") + ! + !!call IOStart(trim(job_is),chkptIO) + ! + write(jchar, '(i4)') islice + ! + filename = trim(suffix)//trim(adjustl(jchar))//'.chk' + ! + call MPI_File_open(mpi_comm_world, filename, mpi_mode_wronly+mpi_mode_create, mpi_info_null, chkptMPIIO, ierr) + ! + if(mpi_rank .eq. 0) call MPI_File_write(chkptMPIIO,trim(name),len(trim(name)),mpi_character,mpi_status_ignore,ierr) + ! + call co_write_matrix_distr(field,mdimen, startdim, enddim,chkptMPIIO) + ! + call MPI_File_seek(chkptMPIIO, offset, MPI_SEEK_END) + if(mpi_rank .eq. 0) call MPI_File_write(chkptMPIIO,trim(name),len(trim(name)),mpi_character,mpi_status_ignore,ierr) + ! + call MPI_File_close(chkptMPIIO, ierr) + ! + end subroutine write_divided_slice_mpi + subroutine divided_slice_open(islice,chkptIO,name,suffix) ! @@ -16295,6 +16472,42 @@ subroutine divided_slice_open(islice,chkptIO,name,suffix) stop 'divided_slice_open-error: The split-file does not exist' ! end subroutine divided_slice_open + + subroutine divided_slice_open_mpi(islice,chkptIO,name,suffix) + ! + implicit none + integer(ik),intent(in) :: islice + type(MPI_File),intent(inout) :: chkptIO + character(len=*),intent(in) :: name,suffix + character(len=4) :: jchar + character(len=cl) :: buf,filename,job_is + integer(ik) :: ilen + integer :: ierr + ! + if (.not.job%IOmatelem_split) return + ! + write(job_is,"('single swap_matrix')") + ! + !!call IOStart(trim(job_is),chkptIO) + ! + write(jchar, '(i4)') islice + ! + filename = trim(suffix)//trim(adjustl(jchar))//'.chk' + ! + call MPI_File_open(mpi_comm_world, filename, mpi_mode_rdonly, mpi_info_null, chkptMPIIO, ierr) + if (ierr) then + if(mpi_rank.eq.0) write(out,"('divided_slice_open-error: The split-file ',a,' does not exist')") trim(filename) + stop 'divided_slice_open-error: The split-file does not exist' + endif + ! + ilen = LEN_TRIM(name) + ! + call MPI_File_read_all(chkptIO, buf, ilen, mpi_character, mpi_status_ignore, ierr) + if ( trim(buf(1:ilen))/=trim(name) ) then + if(mpi_rank.eq.0) write (out,"(' kinetic checkpoint slice ',a20,': header is missing or wrong',a)") filename,buf(1:ilen) + stop 'PTrestore_rot_kinetic_matrix_elements - in slice - header missing or wrong' + end if + end subroutine divided_slice_open_mpi ! subroutine divided_slice_close(islice,chkptIO,name) ! @@ -16320,6 +16533,28 @@ subroutine divided_slice_close(islice,chkptIO,name) ! end subroutine divided_slice_close + subroutine divided_slice_close_mpi(islice,chkptIO,name) + ! + integer(ik),intent(in) :: islice + type(MPI_File),intent(inout) :: chkptIO + character(len=*),intent(in) :: name + character(len=cl) :: buf + integer(ik) :: ilen + integer :: ierr + ! + if (.not.job%IOmatelem_split) return + ! + ilen = LEN_TRIM(name) + ! + call MPI_File_read_all(chkptIO, buf, ilen, mpi_character, mpi_status_ignore, ierr) + if ( trim(buf(1:ilen))/=trim(name) ) then + if(mpi_rank .eq. 0) write (out,"(' divided_slice_close, kinetic checkpoint slice ',a,': footer is missing or wrong',a)") trim(name),buf(1:ilen) + stop 'divided_slice_close - in slice - footer missing or wrong' + end if + ! + call MPI_File_close(chkptIO, ierr) + ! + end subroutine divided_slice_close_mpi ! ! This procedure is thought to make the calculations of the contracted mat. elements @@ -16344,6 +16579,14 @@ subroutine calc_contract_matrix_elements_II(iterm,k1,k2,fl,field,func) integer(ik) :: iroot,jroot,Maxcontracts,Nclasses integer(hik):: ib,ib0 real(rk) :: f_t,f_prod(PT%Nclasses) + !integer(ik) :: dps,dpe + integer :: mloc_a, mloc_b, mloc_c, mloc_d + integer :: nloc_a, nloc_b, nloc_c, nloc_d + integer :: mb_a, mb_b, mb_c, mb_d + integer :: nb_a, nb_b, nb_c, nb_d + integer :: NUMROC + real(rk),allocatable,dimension(:,:) :: As, Bs, Cs, Ds,mat_tts + integer :: i,j,i_loc,j_loc,proc_row,proc_col ! Nclasses = PT%Nclasses Maxcontracts = PT%Maxcontracts @@ -16359,76 +16602,89 @@ subroutine calc_contract_matrix_elements_II(iterm,k1,k2,fl,field,func) ! im2 = min(PT%Nmodes-1,im2) ! - if (job%verbose>=4) call TimerStart('contract_matrix') - ! - me_t = 0 - ! - if (iclasses/=PT%Nclasses) then + if(mpi_rank .eq. mod(iclasses,comm_size)) then + !dps = mpi_rank * (dimen_p/comm_size)+1 + !dpe = (mpi_rank+1) * (dimen_p/comm_size) + !if (dpe.gt.dimen_p) dpe = dimen_p + if (job%verbose>=4) call TimerStart('contract_matrix') ! - !$omp parallel do private(iprim,jprim,nu_i,nu_j) shared(me_t) - do jprim=1,contr(iclasses)%dimen - ! - nu_j(im1:im2) = contr(iclasses)%prim_bs%icoeffs(im1:im2,jprim) + me_t = 0 + ! + if (iclasses/=PT%Nclasses) then ! - do iprim=1,contr(iclasses)%dimen - ! - nu_i(im1:im2) = contr(iclasses)%prim_bs%icoeffs(im1:im2,iprim) - ! - ! Primitive matrix elements of all Hamiltonian components .... + !$omp parallel do private(iprim,jprim,nu_i,nu_j) shared(me_t) + do jprim=1,contr(iclasses)%dimen + !do jprim=dps,dpe ! - me_t(iprim,jprim) = func(iterm,im1,im2,nu_i,nu_j,k,k1,k2) + nu_j(im1:im2) = contr(iclasses)%prim_bs%icoeffs(im1:im2,jprim) ! + do iprim=1,contr(iclasses)%dimen + !do iprim=dps,dpe + ! + nu_i(im1:im2) = contr(iclasses)%prim_bs%icoeffs(im1:im2,iprim) + ! + ! Primitive matrix elements of all Hamiltonian components .... + ! + me_t(iprim,jprim) = func(iterm,im1,im2,nu_i,nu_j,k,k1,k2) + ! + enddo enddo - enddo - !$omp end parallel do - ! - else - ! - !$omp parallel do private(iprim,jprim,nu_i,nu_j) shared(me_t) - do jprim=1,contr(iclasses)%dimen + !$omp end parallel do ! - nu_j(im1:im2+1) = contr(iclasses)%prim_bs%icoeffs(im1:im2+1,jprim) + else ! - do iprim=1,contr(iclasses)%dimen + !$omp parallel do private(iprim,jprim,nu_i,nu_j) shared(me_t) + do jprim=1,contr(iclasses)%dimen + !do jprim=dps,dpe ! - nu_i(im1:im2+1) = contr(iclasses)%prim_bs%icoeffs(im1:im2+1,iprim) - ! - ! Primitive matrix elements of all Hamiltonian components .... - ! - me_t(iprim,jprim) = func(iterm,im1,im2,nu_i,nu_j,k,k1,k2) - ! - me_t(iprim,jprim) = me_t(iprim,jprim)*fl%coeff(iterm,nu_i(PT%Nmodes),nu_j(PT%Nmodes)) - ! - !me_t(iprim,jprim) = Hobject(nu_i(PT%Nmodes),nu_j(PT%Nmodes)) + nu_j(im1:im2+1) = contr(iclasses)%prim_bs%icoeffs(im1:im2+1,jprim) ! + do iprim=1,contr(iclasses)%dimen + !do iprim=dps,dpe + ! + nu_i(im1:im2+1) = contr(iclasses)%prim_bs%icoeffs(im1:im2+1,iprim) + ! + ! Primitive matrix elements of all Hamiltonian components .... + ! + me_t(iprim,jprim) = func(iterm,im1,im2,nu_i,nu_j,k,k1,k2) + ! + me_t(iprim,jprim) = me_t(iprim,jprim)*fl%coeff(iterm,nu_i(PT%Nmodes),nu_j(PT%Nmodes)) + ! + !me_t(iprim,jprim) = Hobject(nu_i(PT%Nmodes),nu_j(PT%Nmodes)) + ! + enddo enddo - enddo - !$omp end parallel do + !$omp end parallel do + ! + endif ! - endif - ! - if (job%verbose>=4) call TimerStop('contract_matrix') - ! - if (job%verbose>=4) call TimerStart('contract_matrix_dgemm') - ! - !mat_t(1:nroots,1:dimen_p) = matmul(transpose(tmat(iclasses)%coeffs(1:dimen_p,1:nroots)),me_t(1:dimen_p,1:dimen_p)) - !mat_tt(iclasses)%coeffs(1:nroots,1:nroots) = matmul(mat_t(1:nroots,1:dimen_p),tmat(iclasses)%coeffs(1:dimen_p,1:nroots)) - ! - call dgemm('T','N',nroots,dimen_p,dimen_p,alpha,tmat(iclasses)%coeffs,dimen_p,& - me_t,dimen_p_max,beta,mat_t,nroots_max) - call dgemm('N','N',nroots,nroots,dimen_p,alpha,mat_t,nroots_max,& - tmat(iclasses)%coeffs,dimen_p,beta,mat_tt(iclasses)%coeffs,nroots) - ! - matclass(iclasses,1:nroots,1:nroots) = mat_tt(iclasses)%coeffs - ! - !call dgemm('T','N',nroots,dimen_p,dimen_p,alpha,tmat(iclasses)%coeffs(1:dimen_p,1:nroots),dimen_p,& - ! me_t(1:dimen_p,1:dimen_p),dimen_p,beta,mat_t(1:nroots,1:dimen_p),nroots) - !call dgemm('N','N',nroots,nroots,dimen_p,alpha,mat_t(1:nroots,1:dimen_p),nroots,& - ! tmat(iclasses)%coeffs(1:dimen_p,1:nroots),dimen_p,beta,mat_tt(iclasses)%coeffs(1:nroots,1:nroots),nroots) - ! - if (job%verbose>=4) call TimerStop('contract_matrix_dgemm') + if (job%verbose>=4) call TimerStop('contract_matrix') + ! + if (job%verbose>=4) call TimerStart('contract_matrix_dgemm') + ! + !mat_t(1:nroots,1:dimen_p) = matmul(transpose(tmat(iclasses)%coeffs(1:dimen_p,1:nroots)),me_t(1:dimen_p,1:dimen_p)) + !mat_tt(iclasses)%coeffs(1:nroots,1:nroots) = matmul(mat_t(1:nroots,1:dimen_p),tmat(iclasses)%coeffs(1:dimen_p,1:nroots)) + ! + + call dgemm('T','N',nroots,dimen_p,dimen_p,alpha,tmat(iclasses)%coeffs,dimen_p,& + me_t,dimen_p_max,beta,mat_t,nroots_max) + call dgemm('N','N',nroots,nroots,dimen_p,alpha,mat_t,nroots_max,& + tmat(iclasses)%coeffs,dimen_p,beta,mat_tt(iclasses)%coeffs,nroots) + + matclass(iclasses,1:nroots,1:nroots) = mat_tt(iclasses)%coeffs + ! + !call dgemm('T','N',nroots,dimen_p,dimen_p,alpha,tmat(iclasses)%coeffs(1:dimen_p,1:nroots),dimen_p,& + ! me_t(1:dimen_p,1:dimen_p),dimen_p,beta,mat_t(1:nroots,1:dimen_p),nroots) + !call dgemm('N','N',nroots,nroots,dimen_p,alpha,mat_t(1:nroots,1:dimen_p),nroots,& + ! tmat(iclasses)%coeffs(1:dimen_p,1:nroots),dimen_p,beta,mat_tt(iclasses)%coeffs(1:nroots,1:nroots),nroots) + ! + if (job%verbose>=4) call TimerStop('contract_matrix_dgemm') + else + matclass(iclasses,1:nroots,1:nroots) = 0.0 + endif ! enddo + call mpi_allreduce(mpi_in_place, matclass, PT%Nclasses*nroots_max*nroots_max, mpi_double_precision, mpi_sum, mpi_comm_world) ! if (job%verbose>=4) call TimerStart('contract_matrix_sum_field') ! @@ -33662,7 +33918,7 @@ subroutine PTstore_icontr_cnu(Maxcontracts,iunit,dir) ! end subroutine PTstore_icontr_cnu - subroutine ptstorempi_icontr_cnu(maxcontracts,iunit,dir) + subroutine PTstorempi_icontr_cnu(maxcontracts,iunit,dir) use mpi_aux integer(ik),intent(in) :: maxcontracts @@ -33692,7 +33948,7 @@ subroutine ptstorempi_icontr_cnu(maxcontracts,iunit,dir) end select - end subroutine ptstorempi_icontr_cnu + end subroutine PTstorempi_icontr_cnu subroutine PTdefine_contr_from_eigenvect(nroots,Neigenlevels,eigen) diff --git a/tran.f90 b/tran.f90 index 27bcff8..6534ef5 100644 --- a/tran.f90 +++ b/tran.f90 @@ -14,7 +14,7 @@ module tran use moltype, only : intensity,extF use symmetry, only : sym - use perturbation, only : PTintcoeffsT,PTrotquantaT,PTNclasses,PTstore_icontr_cnu,PTeigenT,PTdefine_contr_from_eigenvect,PTrepresT + use perturbation, only : PTintcoeffsT,PTrotquantaT,PTNclasses,PTstore_icontr_cnu,PTeigenT,PTdefine_contr_from_eigenvect,PTrepresT,PTstorempi_icontr_cnu private public read_contrind,read_eigenval, TReigenvec_unit, bset_contrT, & @@ -1193,6 +1193,7 @@ end subroutine TRconvert_repres_J0_to_contr ! subroutine TRconvert_matel_j0_eigen(jrot) + use mpi_aux implicit none integer(ik),intent(in) :: Jrot integer(ik) :: info,imu @@ -1213,6 +1214,9 @@ subroutine TRconvert_matel_j0_eigen(jrot) integer(ik),allocatable :: ijterm(:,:) double precision,parameter :: alpha = 1.0d0,beta=0.0d0 character(len=cl) :: jchar,filename + type(MPI_File) :: fileh, fileh_w + integer(kind=MPI_OFFSET_KIND) :: mpioffset + integer :: ierr ! if (job%verbose>=2) write(out,"(/'Compute J=0 vib. matrix elements of the kinetic energy operator...')") ! @@ -1234,6 +1238,7 @@ subroutine TRconvert_matel_j0_eigen(jrot) ' at least one must be set to CONVERT or EIGENfunc SAVE CONVERT' stop 'TRconvert_matel_j0_eigen: illegal MATELEM or EXTMATELEM <> CONVERT' end if + call co_init_comms()!TODO get rid of this one ! matsize = int(Neigenroots*(Neigenroots+1)/2,hik) matsize2 = int(Neigenroots*Neigenroots,hik) @@ -1398,15 +1403,23 @@ subroutine TRconvert_matel_j0_eigen(jrot) job_is ='Eigen-vib. matrix elements of the rot. kinetic part' call IOStart(trim(job_is),chkptIO) ! - open(chkptIO,form='unformatted',action='write',position='rewind',status='replace',file=job%kineteigen_file) - write(chkptIO) 'Start Kinetic part' + !open(chkptIO,form='unformatted',action='write',position='rewind',status='replace',file=job%kineteigen_file) + !write(chkptIO) 'Start Kinetic part' + call MPI_File_open(mpi_comm_world, job%kineteigen_file, mpi_mode_wronly+mpi_mode_create, mpi_info_null, fileh_w, ierr) + call MPI_File_set_errhandler(fileh_w, MPI_ERRORS_ARE_FATAL) + mpioffset = 0 + call MPI_File_set_size(fileh_w, mpioffset, ierr) + call MPI_File_write(fileh_w, '[MPIIO]', 7, mpi_character, mpi_status_ignore, ierr) + call MPI_File_write(fileh_w, 'Start Kinetic part', 18, mpi_character, mpi_status_ignore, ierr) ! treat_vibration = .false. ! - call PTstore_icontr_cnu(Neigenroots,chkptIO,job%IOj0matel_action) + !call PTstore_icontr_cnu(Neigenroots,chkptIO,job%IOj0matel_action) + call PTstorempi_icontr_cnu(Neigenroots,fileh_w,job%IOj0matel_action) ! if (job%vib_rot_contr) then - write(chkptIO) 'vib-rot' + !write(chkptIO) 'vib-rot' + call MPI_File_write(fileh_w, 'vib-rot', 7, mpi_character, mpi_status_ignore, ierr) endif ! endif @@ -1435,6 +1448,7 @@ subroutine TRconvert_matel_j0_eigen(jrot) endif ! + call MPI_File_open(mpi_comm_world, job%kinetmat_file, mpi_mode_rdonly, mpi_info_null, fileh, ierr) ! The eigen-vibrational (J=0) matrix elements of the rotational and coriolis ! kinetic parts are being computed here. ! @@ -1444,15 +1458,18 @@ subroutine TRconvert_matel_j0_eigen(jrot) ! task = 'rot' ! - write(chkptIO) 'g_rot' + !write(chkptIO) 'g_rot' + call MPI_File_write(fileh_w, 'g_rot', 5, mpi_character, mpi_status_ignore, ierr) ! - call restore_rot_kinetic_matrix_elements(jrot,treat_vibration,task,iunit) + !call restore_rot_kinetic_matrix_elements(jrot,treat_vibration,task,iunit) + call restore_rot_kinetic_matrix_elements_mpi(jrot,treat_vibration,task,fileh) ! else ! task = 'top' ! - call restore_rot_kinetic_matrix_elements(jrot,treat_vibration,task,iunit) + !call restore_rot_kinetic_matrix_elements(jrot,treat_vibration,task,iunit) + call restore_rot_kinetic_matrix_elements_mpi(jrot,treat_vibration,task,fileh) ! endif ! @@ -1480,7 +1497,9 @@ subroutine TRconvert_matel_j0_eigen(jrot) ! else ! - read(iunit) gmat + ! read(iunit) gmat + call MPI_File_read_all(fileh, gmat, dimen*dimen, mpi_double_precision, mpi_status_ignore, ierr) + !write (chkptIO) gmat ! endif ! @@ -1500,13 +1519,15 @@ subroutine TRconvert_matel_j0_eigen(jrot) ! else ! - write (chkptIO) mat_s + !write (chkptIO) mat_s + call MPI_File_write(fileh_w, mat_s, Neigenroots*Neigenroots, mpi_double_precision, mpi_status_ignore, ierr) ! endif ! enddo ! enddo + ! if (job%verbose>=5) call TimerStop('J0-convertion for g_rot') ! @@ -1517,9 +1538,10 @@ subroutine TRconvert_matel_j0_eigen(jrot) ! task = 'cor' ! - call restore_rot_kinetic_matrix_elements(jrot,treat_vibration,task,iunit) + call restore_rot_kinetic_matrix_elements_mpi(jrot,treat_vibration,task,fileh) ! - write(chkptIO) 'g_cor' + !write(chkptIO) 'g_cor' + call MPI_File_write(fileh_w, 'g_cor', 5, mpi_character, mpi_status_ignore, ierr) ! endif ! @@ -1549,7 +1571,8 @@ subroutine TRconvert_matel_j0_eigen(jrot) ! else ! - read(iunit) gmat + ! read(iunit) gmat + call MPI_File_read_all(fileh, gmat, dimen*dimen, mpi_double_precision, mpi_status_ignore, ierr) ! endif ! @@ -1569,7 +1592,8 @@ subroutine TRconvert_matel_j0_eigen(jrot) ! else ! - write (chkptIO) mat_s + !write (chkptIO) mat_s + call MPI_File_write(fileh_w, mat_s, Neigenroots*Neigenroots, mpi_double_precision, mpi_status_ignore, ierr) ! endif ! @@ -1579,15 +1603,17 @@ subroutine TRconvert_matel_j0_eigen(jrot) ! if (job%verbose>=5) call TimerStop('J0-convertion for g_cor') ! - if (.not.job%IOmatelem_split.or.job%iswap(1)==1) write(chkptIO) 'End Kinetic part' + !if (.not.job%IOmatelem_split.or.job%iswap(1)==1) write(chkptIO) 'End Kinetic part' + if (.not.job%IOmatelem_split.or.job%iswap(1)==1) call MPI_File_write(fileh_w, 'End Kinetic part', 16, mpi_character, mpi_status_ignore, ierr) ! if (.not.job%vib_rot_contr) then - close(chkptIO,status='keep') + !close(chkptIO,status='keep') + call MPI_File_close(fileh_w, ierr) endif ! task = 'end' ! - call restore_rot_kinetic_matrix_elements(jrot,treat_vibration,task,iunit) + call restore_rot_kinetic_matrix_elements_mpi(jrot,treat_vibration,task,fileh) ! if (allocated(gmat)) deallocate(gmat) call ArrayStop('gmat-fields') @@ -1595,9 +1621,11 @@ subroutine TRconvert_matel_j0_eigen(jrot) if (job%verbose>=3) write(out,"(' ...done!')") ! endif + call MPI_File_close(fileh, ierr) ! ! External field part ! + !!!!! MPIIO TODO !!!!! if (FLextF_matelem) then ! if (job%verbose>=3) write(out,"(/' Transform extF to J0-representation...')") @@ -1623,15 +1651,25 @@ subroutine TRconvert_matel_j0_eigen(jrot) ! filename = job%extFmat_file ! - open(iunit,form='unformatted',action='read',position='rewind',status='old',file=filename) + !open(iunit,form='unformatted',action='read',position='rewind',status='old',file=filename) + ! + call MPI_File_open(mpi_comm_world, filename, mpi_mode_rdonly, mpi_info_null, fileh, ierr) + ! + call MPI_File_read_all(fileh, buf20, 7, mpi_character, mpi_status_ignore, ierr) + if (buf20(1:7)/='[MPIIO]') then + write (out,"(' Vib. kinetic checkpoint file ',a,' is not an MPIIO file: ',a)") filename, buf20 + stop 'restore_vib_matric_elemets - Not an MPIIO file' + end if ! - read(iunit) buf20 + !read(iunit) buf20 + call MPI_File_read_all(fileh, buf20, 20, mpi_character, mpi_status_ignore, ierr) if (buf20/='Start external field') then write (out,"(' restore_vib_matrix_elements ',a,' has bogus header: ',a)") filename,buf20 stop 'restore_vib_matrix_elements - bogus file format' end if ! - read(iunit) ncontr_t + !read(iunit) ncontr_t + call MPI_File_read_all(fileh, ncontr_t, 1, mpi_integer, mpi_status_ignore, ierr) ! if (bset_contr(1)%Maxcontracts/=ncontr_t) then write (out,"(' Dipole moment checkpoint file ',a)") filename @@ -1648,12 +1686,19 @@ subroutine TRconvert_matel_j0_eigen(jrot) job_is ='external field contracted matrix elements for J=0' call IOStart(trim(job_is),chkptIO) ! - open(chkptIO,form='unformatted',action='write',position='rewind',status='replace',file=job%exteigen_file) - write(chkptIO) 'Start external field' + !open(chkptIO,form='unformatted',action='write',position='rewind',status='replace',file=job%exteigen_file) + !write(chkptIO) 'Start external field' + call MPI_File_open(mpi_comm_world, job%exteigen_file, mpi_mode_wronly+mpi_mode_create, mpi_info_null, fileh_w, ierr) + call MPI_File_set_errhandler(fileh_w, MPI_ERRORS_ARE_FATAL) + mpioffset = 0 + call MPI_File_set_size(fileh_w, mpioffset, ierr) + call MPI_File_write(fileh_w, '[MPIIO]', 7, mpi_character, mpi_status_ignore, ierr) + call MPI_File_write(fileh_w, 'Start external field', 20, mpi_character, mpi_status_ignore, ierr) ! ! store the matrix elements ! - write(chkptIO) Neigenroots + !write(chkptIO) Neigenroots + call MPI_File_write(fileh_w, Neigenroots, 1, mpi_integer, mpi_status_ignore, ierr) ! endif ! @@ -1681,9 +1726,11 @@ subroutine TRconvert_matel_j0_eigen(jrot) ! else ! - read(iunit) imu_t + !read(iunit) imu_t + call MPI_File_read_all(fileh, imu_t, 1, mpi_integer, mpi_status_ignore, ierr) ! - read(iunit) extF_me + !read(iunit) extF_me + call MPI_File_read_all(fileh, extF_me, dimen*dimen, mpi_double_precision, mpi_status_ignore, ierr) ! endif ! @@ -1720,8 +1767,10 @@ subroutine TRconvert_matel_j0_eigen(jrot) ! if (.not.job%IOextF_divide.or.job%IOextF_stitch) then ! - write(chkptIO) imu - write(chkptIO) mat_s + !write(chkptIO) imu + !write(chkptIO) mat_s + call MPI_File_write_all(fileh_w, imu, 1, mpi_integer, mpi_status_ignore, ierr) + call MPI_File_write_all(fileh_w, mat_s, Neigenroots*Neigenroots, mpi_double_precision, mpi_status_ignore, ierr) ! else ! @@ -1736,13 +1785,15 @@ subroutine TRconvert_matel_j0_eigen(jrot) ! if (.not.job%IOextF_divide) then ! - read(iunit) buf20(1:18) + !read(iunit) buf20(1:18)double_precision + call MPI_File_read_all(fileh, buf20, 18, mpi_character, mpi_status_ignore, ierr) if (buf20(1:18)/='End external field') then - write (out,"(' restore_Extvib_matrix_elements ',a,' has bogus footer: ',a)") job%kinetmat_file,buf20(1:17) + write (out,"(' restore_Extvib_matrix_elements ',a,' has bogus footer: ',a)") filename,buf20(1:18) stop 'restore_Extvib_matrix_elements - bogus file format' end if ! - close(iunit,status='keep') + !close(iunit,status='keep') + call MPI_File_close(fileh, ierr) ! !job_is ='external field contracted matrix elements for J=0' !call IOStart(trim(job_is),iunit) @@ -1751,8 +1802,10 @@ subroutine TRconvert_matel_j0_eigen(jrot) ! if (.not.job%IOextF_divide.or.job%IOextF_stitch) then ! - write(chkptIO) 'End external field' - close(chkptIO,status='keep') + !write(chkptIO) 'End external field' + call MPI_File_write_all(fileh_w, 'End external field', 18, mpi_character, mpi_status_ignore, ierr) + !close(chkptIO,status='keep') + call MPI_File_close(fileh_w, ierr) ! endif ! @@ -2222,6 +2275,273 @@ end subroutine find_groundstate_icontr end subroutine restore_rot_kinetic_matrix_elements ! ! + ! [MPIIO] Here we restore the vibrational (J=0) matrix elements of the rotational kinetic part G_rot and G_cor + ! + subroutine restore_rot_kinetic_matrix_elements_mpi(jrot,treat_vibration,kinetic_part,fileh) + use mpi_aux + ! + implicit none + ! + integer(ik),intent(in) :: jrot + logical,intent(in) :: treat_vibration + character(len=3),intent(in) :: kinetic_part + !integer(ik),intent(inout) :: chkptIO + type(MPI_File),intent(in) :: fileh + ! + integer(ik) :: iclasses,alloc,ilevel,ib,max_deg_size,nclasses,islice + character(len=cl) :: job_is + character(len=18) :: buf18 + integer(ik) :: ncontr_t,rootsize + integer(ik),allocatable :: imat_t(:,:) + real(rk),allocatable :: mat_t(:) + integer :: ierr + ! + nclasses = bset_contr(1)%nclasses + ! + !call co_init_comms() + select case (kinetic_part) + ! + case('rot','top') + ! + job_is ='Vib. matrix elements of the rot. kinetic' + !call IOStart(trim(job_is),chkptIO) + ! + !open(chkptIO,form='unformatted',action='read',position='rewind',status='old',file=job%kinetmat_file) + !call MPI_File_Open(mpi_comm_world, 'mpiiofile', mpi_mode_rdonly, mpi_info_null, fileh, ierr) + ! + !read(chkptIO) buf18 + call MPI_File_read_all(fileh, buf18, 7, mpi_character, mpi_status_ignore, ierr) + if (buf18(1:7)/='[MPIIO]') then + write (out,"(' Vib. kinetic checkpoint file ',a,' is not an MPIIO file: ',a)") 'mpiiofile',buf18 + stop 'PTcontracted_matelem_class - Not an MPIIO file' + end if + call MPI_File_read_all(fileh, buf18, 18, mpi_character, mpi_status_ignore, ierr) + if (buf18/='Start Kinetic part') then + write (out,"(' Vib. kinetic checkpoint file ',a,' has bogus header: ',a)") 'mpiiofile',buf18 + stop 'PTcontracted_matelem_class - bogus file format' + end if + ! + !read(chkptIO) ncontr_t + call MPI_File_read_all(fileh, ncontr_t, 1, mpi_integer, mpi_status_ignore, ierr) + ! + if (bset_contr(1)%Maxcontracts/=ncontr_t) then + write (out,"(' Vib. kinetic checkpoint file ',a)") job%kinetmat_file + write (out,"(' Actual and stored basis sizes at J=0 do not agree ',2i0)") bset_contr(1)%Maxcontracts,ncontr_t + stop 'PTcontracted_matelem_class - in file - illegal nroots ' + end if + ! + rootsize = bset_contr(1)%Maxcontracts*(bset_contr(1)%Maxcontracts+1)/2 + ! + if (job%verbose>=6) write(out,"(/'Restore_rot_kin...: Number of elements: ',i8)") bset_contr(1)%Maxcontracts + ! + ! Read the indexes of the J=0 contracted basis set. + ! + allocate (imat_t(0:nclasses,ncontr_t),stat=alloc) + call ArrayStart('contractive_space',alloc,size(imat_t),kind(imat_t)) + ! + !read(chkptIO) buf18(1:10) + call MPI_File_read_all(fileh, buf18, 10, mpi_character, mpi_status_ignore, ierr) + if (buf18(1:10)/='icontr_cnu') then + write (out,"(' Vib. kinetic checkpoint file ',a,': icontr_cnu is missing ',a)") job%kinetmat_file,buf18(1:10) + stop 'restore_rot_kinetic_matrix_elements - in file - icontr_cnu missing' + end if + ! + !read(chkptIO) imat_t(0:nclasses,1:ncontr_t) + call MPI_File_read_all(fileh, imat_t, (nclasses+1)*ncontr_t, mpi_integer, mpi_status_ignore, ierr) + ! + if (job%vib_rot_contr) then + ! + allocate (TRicontr_cnu(0:nclasses,ncontr_t),stat=alloc) + call ArrayStart('TRicontr_cnu',alloc,size(TRicontr_cnu),kind(TRicontr_cnu)) + TRicontr_cnu = imat_t + ! + endif + ! + !read(chkptIO) buf18(1:11) + call MPI_File_read_all(fileh, buf18, 11, mpi_character, mpi_status_ignore, ierr) + if (buf18(1:11)/='icontr_ideg') then + write (out,"(' Vib. kinetic checkpoint file ',a,': icontr_ideg is missing ',a)") job%kinetmat_file,buf18(1:11) + stop 'restore_rot_kinetic_matrix_elements - in file - icontr_ideg missing' + end if + ! + !read(chkptIO) imat_t(0:nclasses,1:ncontr_t) + call MPI_File_read_all(fileh, imat_t, (nclasses+1)*ncontr_t, mpi_integer, mpi_status_ignore, ierr) + ! + deallocate(imat_t) + ! + call arraystop('contractive_space') + ! + ! Read the rotational part only if its really needed. + ! + if (trim(kinetic_part)=='rot') then + ! + !read(chkptIO) buf18(1:4) + call MPI_File_read_all(fileh, buf18, 5, mpi_character, mpi_status_ignore, ierr) + if (buf18(1:5)/='g_rot') then + write (out,"(' Vib. kinetic checkpoint file ',a,': g_rot is missing ',a)") 'mpiiofile',buf18(1:5) + ! + if (buf18(1:4)=='hvib'.or.buf18(1:3)=='End') & + write (out,"(a,a)") & + ' Most likely the split chk-points are supposed to be used.',& + 'Re-do MATELEM SAVE or use SPLIT in MATELEM !' + stop 'restore_rot_kinetic_matrix_elements - in file - g_rot missing' + end if + ! + endif + ! + if (job%vib_rot_contr) then + ! + !inquire(iolength=rec_len) f_t + !rec_len = rec_len*ncontr*PT%max_deg_size + !! + !do islice = 1,9+3*PT%Nmodes + ! call divided_slice_open_vib_rot(islice,rec_len,job%matelem_suffix) + !enddo + ! + call find_groundstate_icontr(bset_contr(1)%nclasses) + ! + endif + ! + case('cor') + ! + !read(chkptIO) buf18(1:5) + call MPI_File_read_all(fileh, buf18, 5, mpi_character, mpi_status_ignore, ierr) + if (buf18(1:5)/='g_cor') then + write (out,"(' Vib. kinetic checkpoint file ',a,': g_cor is missing ',a)") job%kinetmat_file,buf18(1:5) + stop 'restore_rot_kinetic_matrix_elements - in file - g_cor missing' + end if + ! + case('end') + ! + if (job%vib_rot_contr) then + ! + do islice = 1,9+3*FLNmodes + call divided_slice_close_vib_rot(islice) + enddo + ! + endif + ! + !read(chkptIO) buf18(1:4) + call MPI_File_read_all(fileh, buf18, 4, mpi_character, mpi_status_ignore, ierr) + if (buf18(1:4)/='hvib'.and.buf18(1:3)/='End'.and.buf18(1:4)/='vib-') then + write (out,"(' Vib. kinetic checkpoint file ',a,': hvib, vib-rot, End is missing ',a)") job%kinetmat_file,buf18(1:4) + stop 'restore_rot_kinetic_matrix_elements - in file - hvib or End missing' + end if + ! + !close(chkptIO,status='keep') + !call MPI_File_close(fileh, ierr) + ! + end select + ! + contains + ! + subroutine divided_slice_open_vib_rot(islice,rec_len,suffix) + ! + implicit none + integer(ik),intent(in) :: islice,rec_len + character(len=*),intent(in) :: suffix + integer(ik) :: chkptIO + character(len=4) :: jchar + character(len=cl) :: buf,filename,job_is + integer(ik) :: ilen + logical :: ifopened + ! + if (.not.job%IOmatelem_split) return + ! + write(jchar, '(i4)') islice + ! + filename = trim(suffix)//trim(adjustl(jchar))//'.chk' + ! + write(job_is,"('single swap_matrix #',i8)") islice + ! + call IOStart(trim(job_is),chkptIO) + ! + ! use direct access format for vib-rot + ! + open(chkptIO,access='direct',action='read',status='old',recl=rec_len,file=filename) + ! + end subroutine divided_slice_open_vib_rot + ! + subroutine divided_slice_close_vib_rot(islice) + ! + implicit none + integer(ik),intent(in) :: islice + integer(ik) :: chkptIO + character(len=4) :: jchar + character(len=cl) :: filename,job_is + integer(ik) :: ilen + ! + if (.not.job%IOmatelem_split) return + ! + write(jchar, '(i4)') islice + ! + write(job_is,"('single swap_matrix #',i8)") islice + ! + call IOStart(trim(job_is),chkptIO) + ! + ! use direct access format for vib-rot + ! + close(chkptIO,status='keep') + ! + end subroutine divided_slice_close_vib_rot + ! + ! find correspondence between contracted quantum numbers: current and for J=0 + ! + subroutine find_groundstate_icontr(Nclasses) + ! + integer(ik), intent(in) :: Nclasses + integer(ik) :: maxcontr + ! + integer(ik) :: icontr,iterm,cnu_(1:bset_contr(1)%nclasses),icontr0,alloc,maxcontr0 + ! + if (job%verbose>=4) write(out,"(' Find correlation between J=0 and J/=0 contr. basis functions...')") + ! + maxcontr = size(TRicontr_cnu,dim=2) + ! + ! count icontr0-s + icontr0 = 0 + cnu_ = 0 + ! + do icontr = 1,maxcontr + ! + if (any(TRicontr_cnu(1:Nclasses,icontr)/=cnu_(1:Nclasses))) then + ! + icontr0 = icontr0 + 1 + cnu_ = TRicontr_cnu(1:Nclasses,icontr) + ! + endif + ! + enddo + ! + maxcontr0 = icontr0 + ! + allocate(Ncontr02icase0(maxcontr0,2),stat=alloc) + call ArrayStart('Ncontr02icase0',alloc,size(Ncontr02icase0),kind(Ncontr02icase0)) + ! + ! count ideg0 + icontr0 = 0 + cnu_ = 0 + ! + Ncontr02icase0(1,1) = 1 + Ncontr02icase0(maxcontr0,2) = maxcontr + ! + do icontr = 1,maxcontr + ! + if (any(TRicontr_cnu(1:Nclasses,icontr)/=cnu_(1:Nclasses))) then + ! + icontr0 = icontr0 + 1 + cnu_ = TRicontr_cnu(1:Nclasses,icontr) + Ncontr02icase0(icontr0,1) = icontr + if (icontr0>1) Ncontr02icase0(icontr0-1,2) = icontr-1 + ! + endif + ! + enddo + ! + end subroutine find_groundstate_icontr + ! + end subroutine restore_rot_kinetic_matrix_elements_mpi + ! ! subroutine eigen_vib_matelem_vector(iparity,ilevelI,irootI,nlevels,nroots,cdimenmax,icoeff,fcoeff,cdimen,field,mat) ! From 64e38dbbb0adf0b245726b60f994767af1765959 Mon Sep 17 00:00:00 2001 From: Arjen Tamerus Date: Wed, 8 May 2019 13:13:16 +0100 Subject: [PATCH 09/79] Remove accidentally committed test code --- perturbation.f90 | 132 ++++++++++++++++++++--------------------------- 1 file changed, 56 insertions(+), 76 deletions(-) diff --git a/perturbation.f90 b/perturbation.f90 index a6f43b2..aaa97d3 100644 --- a/perturbation.f90 +++ b/perturbation.f90 @@ -16579,14 +16579,6 @@ subroutine calc_contract_matrix_elements_II(iterm,k1,k2,fl,field,func) integer(ik) :: iroot,jroot,Maxcontracts,Nclasses integer(hik):: ib,ib0 real(rk) :: f_t,f_prod(PT%Nclasses) - !integer(ik) :: dps,dpe - integer :: mloc_a, mloc_b, mloc_c, mloc_d - integer :: nloc_a, nloc_b, nloc_c, nloc_d - integer :: mb_a, mb_b, mb_c, mb_d - integer :: nb_a, nb_b, nb_c, nb_d - integer :: NUMROC - real(rk),allocatable,dimension(:,:) :: As, Bs, Cs, Ds,mat_tts - integer :: i,j,i_loc,j_loc,proc_row,proc_col ! Nclasses = PT%Nclasses Maxcontracts = PT%Maxcontracts @@ -16602,89 +16594,77 @@ subroutine calc_contract_matrix_elements_II(iterm,k1,k2,fl,field,func) ! im2 = min(PT%Nmodes-1,im2) ! - if(mpi_rank .eq. mod(iclasses,comm_size)) then - !dps = mpi_rank * (dimen_p/comm_size)+1 - !dpe = (mpi_rank+1) * (dimen_p/comm_size) - !if (dpe.gt.dimen_p) dpe = dimen_p - if (job%verbose>=4) call TimerStart('contract_matrix') - ! - me_t = 0 + if (job%verbose>=4) call TimerStart('contract_matrix') + ! + me_t = 0 + ! + if (iclasses/=PT%Nclasses) then ! - if (iclasses/=PT%Nclasses) then + !$omp parallel do private(iprim,jprim,nu_i,nu_j) shared(me_t) + do jprim=1,contr(iclasses)%dimen ! - !$omp parallel do private(iprim,jprim,nu_i,nu_j) shared(me_t) - do jprim=1,contr(iclasses)%dimen - !do jprim=dps,dpe + nu_j(im1:im2) = contr(iclasses)%prim_bs%icoeffs(im1:im2,jprim) + ! + do iprim=1,contr(iclasses)%dimen ! - nu_j(im1:im2) = contr(iclasses)%prim_bs%icoeffs(im1:im2,jprim) + nu_i(im1:im2) = contr(iclasses)%prim_bs%icoeffs(im1:im2,iprim) + ! + ! Primitive matrix elements of all Hamiltonian components .... + ! + me_t(iprim,jprim) = func(iterm,im1,im2,nu_i,nu_j,k,k1,k2) ! - do iprim=1,contr(iclasses)%dimen - !do iprim=dps,dpe - ! - nu_i(im1:im2) = contr(iclasses)%prim_bs%icoeffs(im1:im2,iprim) - ! - ! Primitive matrix elements of all Hamiltonian components .... - ! - me_t(iprim,jprim) = func(iterm,im1,im2,nu_i,nu_j,k,k1,k2) - ! - enddo enddo - !$omp end parallel do + enddo + !$omp end parallel do + ! + else + ! + !$omp parallel do private(iprim,jprim,nu_i,nu_j) shared(me_t) + do jprim=1,contr(iclasses)%dimen ! - else + nu_j(im1:im2+1) = contr(iclasses)%prim_bs%icoeffs(im1:im2+1,jprim) ! - !$omp parallel do private(iprim,jprim,nu_i,nu_j) shared(me_t) - do jprim=1,contr(iclasses)%dimen - !do jprim=dps,dpe + do iprim=1,contr(iclasses)%dimen ! - nu_j(im1:im2+1) = contr(iclasses)%prim_bs%icoeffs(im1:im2+1,jprim) + nu_i(im1:im2+1) = contr(iclasses)%prim_bs%icoeffs(im1:im2+1,iprim) + ! + ! Primitive matrix elements of all Hamiltonian components .... + ! + me_t(iprim,jprim) = func(iterm,im1,im2,nu_i,nu_j,k,k1,k2) + ! + me_t(iprim,jprim) = me_t(iprim,jprim)*fl%coeff(iterm,nu_i(PT%Nmodes),nu_j(PT%Nmodes)) + ! + !me_t(iprim,jprim) = Hobject(nu_i(PT%Nmodes),nu_j(PT%Nmodes)) ! - do iprim=1,contr(iclasses)%dimen - !do iprim=dps,dpe - ! - nu_i(im1:im2+1) = contr(iclasses)%prim_bs%icoeffs(im1:im2+1,iprim) - ! - ! Primitive matrix elements of all Hamiltonian components .... - ! - me_t(iprim,jprim) = func(iterm,im1,im2,nu_i,nu_j,k,k1,k2) - ! - me_t(iprim,jprim) = me_t(iprim,jprim)*fl%coeff(iterm,nu_i(PT%Nmodes),nu_j(PT%Nmodes)) - ! - !me_t(iprim,jprim) = Hobject(nu_i(PT%Nmodes),nu_j(PT%Nmodes)) - ! - enddo enddo - !$omp end parallel do - ! - endif - ! - if (job%verbose>=4) call TimerStop('contract_matrix') - ! - if (job%verbose>=4) call TimerStart('contract_matrix_dgemm') - ! - !mat_t(1:nroots,1:dimen_p) = matmul(transpose(tmat(iclasses)%coeffs(1:dimen_p,1:nroots)),me_t(1:dimen_p,1:dimen_p)) - !mat_tt(iclasses)%coeffs(1:nroots,1:nroots) = matmul(mat_t(1:nroots,1:dimen_p),tmat(iclasses)%coeffs(1:dimen_p,1:nroots)) + enddo + !$omp end parallel do ! + endif + ! + if (job%verbose>=4) call TimerStop('contract_matrix') + ! + if (job%verbose>=4) call TimerStart('contract_matrix_dgemm') + ! + !mat_t(1:nroots,1:dimen_p) = matmul(transpose(tmat(iclasses)%coeffs(1:dimen_p,1:nroots)),me_t(1:dimen_p,1:dimen_p)) + !mat_tt(iclasses)%coeffs(1:nroots,1:nroots) = matmul(mat_t(1:nroots,1:dimen_p),tmat(iclasses)%coeffs(1:dimen_p,1:nroots)) + ! - call dgemm('T','N',nroots,dimen_p,dimen_p,alpha,tmat(iclasses)%coeffs,dimen_p,& - me_t,dimen_p_max,beta,mat_t,nroots_max) - call dgemm('N','N',nroots,nroots,dimen_p,alpha,mat_t,nroots_max,& - tmat(iclasses)%coeffs,dimen_p,beta,mat_tt(iclasses)%coeffs,nroots) + call dgemm('T','N',nroots,dimen_p,dimen_p,alpha,tmat(iclasses)%coeffs,dimen_p,& + me_t,dimen_p_max,beta,mat_t,nroots_max) + call dgemm('N','N',nroots,nroots,dimen_p,alpha,mat_t,nroots_max,& + tmat(iclasses)%coeffs,dimen_p,beta,mat_tt(iclasses)%coeffs,nroots) - matclass(iclasses,1:nroots,1:nroots) = mat_tt(iclasses)%coeffs - ! - !call dgemm('T','N',nroots,dimen_p,dimen_p,alpha,tmat(iclasses)%coeffs(1:dimen_p,1:nroots),dimen_p,& - ! me_t(1:dimen_p,1:dimen_p),dimen_p,beta,mat_t(1:nroots,1:dimen_p),nroots) - !call dgemm('N','N',nroots,nroots,dimen_p,alpha,mat_t(1:nroots,1:dimen_p),nroots,& - ! tmat(iclasses)%coeffs(1:dimen_p,1:nroots),dimen_p,beta,mat_tt(iclasses)%coeffs(1:nroots,1:nroots),nroots) - ! - if (job%verbose>=4) call TimerStop('contract_matrix_dgemm') - else - matclass(iclasses,1:nroots,1:nroots) = 0.0 - endif + matclass(iclasses,1:nroots,1:nroots) = mat_tt(iclasses)%coeffs + ! + !call dgemm('T','N',nroots,dimen_p,dimen_p,alpha,tmat(iclasses)%coeffs(1:dimen_p,1:nroots),dimen_p,& + ! me_t(1:dimen_p,1:dimen_p),dimen_p,beta,mat_t(1:nroots,1:dimen_p),nroots) + !call dgemm('N','N',nroots,nroots,dimen_p,alpha,mat_t(1:nroots,1:dimen_p),nroots,& + ! tmat(iclasses)%coeffs(1:dimen_p,1:nroots),dimen_p,beta,mat_tt(iclasses)%coeffs(1:nroots,1:nroots),nroots) + ! + if (job%verbose>=4) call TimerStop('contract_matrix_dgemm') ! enddo - call mpi_allreduce(mpi_in_place, matclass, PT%Nclasses*nroots_max*nroots_max, mpi_double_precision, mpi_sum, mpi_comm_world) ! if (job%verbose>=4) call TimerStart('contract_matrix_sum_field') ! From 3f68ac03b9fd1618ab7bb17c3907e3ba26d59867 Mon Sep 17 00:00:00 2001 From: Arjen Tamerus Date: Wed, 8 May 2019 16:15:47 +0100 Subject: [PATCH 10/79] Some MPI documentation + SLURM script + makefile update --- TROVE.wiki/TROVE-mpi.md | 82 ++++++++++++++++++++++++++++++++++++ TROVE.wiki/trove-slurm.batch | 30 +++++++++++++ makefile | 4 +- 3 files changed, 114 insertions(+), 2 deletions(-) create mode 100644 TROVE.wiki/TROVE-mpi.md create mode 100644 TROVE.wiki/trove-slurm.batch diff --git a/TROVE.wiki/TROVE-mpi.md b/TROVE.wiki/TROVE-mpi.md new file mode 100644 index 0000000..da0017f --- /dev/null +++ b/TROVE.wiki/TROVE-mpi.md @@ -0,0 +1,82 @@ +# MPI functionality + +Distributed memory parallelism using MPI has been implemented in the +PTcontracted_matalem_class subroutine, plus subroutines called by or related to +it. Most MPI functionality has been consolidated in the mpi_aux.f90 file. + +File output has been implemented using MPI-IO. Because MPI is not compatible +with the Fortran's record-based unformatted file format, a new file format has +been implemented. Preserving the old file format by using Master I/O has been +considered, but cannot be supported for large simulations (where distributed +memory parallelism is required) as the size of the data needing to be written +will exceed the available memory on a compute node. + +The new file format is similar to the non-mpi format, except for using 'raw' +binary data versus Fortran's records. Conversion utilities between the two +formats will be provided. + +The following has been implemented: + + - Writing of single-file data + - Reading of single-file data + - Writing of split file data + +The following needs testing: + + - Reading of split file data + +The following is in progress: + + - Reading the new format on MPI-free systems + - Conversion functionality + + +To enable writing in the new file format, add the parameter `format mpiio` to +the input file under `CHECK_POINT`, e.g.: + +``` +CHECK_POINT +HAMILTONIAN read +potential read +kinetic read +external read +basis_set read +CONTRACT read +matelem save +extmatelem save +eigenfunc save +format mpiio +END +``` + +NOTE: As other routines than `matelem` have not been touched yet, expect a large +amount of duplicated output from stdout. This does not affect the integrity of +file output. + +# How to run + +## Requirements + +The only additional requirement to build and run the MPI version of TROVE is a +compatible MPI library. The code has been tested with Intel MPI, versions +2017.4, 2018.4 and 2019.3. Intel 2018.1 is confirmed *not* working. + +This version of TROVE should also build and run correctly with OpenMPI, and +should be compatible with GNU GFortran but this is untested. + +Although not required, it is highly recommended to run on a distributed +filesystem (e.g. Lustre + striping) for significantly increased I/O performance. + +## Building TROVE-MPI + +Make sure the compiler is set to 'mpif90' or 'mpifort' in the Makegfile, then +run `make` in the source directory. + +## Running TROVE-MPI + +Once built, run the code like the sequential version but prepended with `mpirun` +or your scheduler's mpi wrapper (e.g. `srun`). E.g.: + +`mpirun trove.x infile.inp` + +A sample batch script for SLURM is provided. diff --git a/TROVE.wiki/trove-slurm.batch b/TROVE.wiki/trove-slurm.batch new file mode 100644 index 0000000..2a6f026 --- /dev/null +++ b/TROVE.wiki/trove-slurm.batch @@ -0,0 +1,30 @@ +#!/bin/bash +#SBATCH -J TROVE-MPI- +#SBATCH -p +#SBATCH -A +#SBATCH -N +#SBATCH -n # Recommend number of sockets +#SBATCH -c # for OpenMP, NOTE recommend number of cores per socket +#SBATCH -t 3:0:0 +#SBATCH --exclusive +#SBATCH -o "trove.%j.out" + +# This batch script runs TROVE-MPI using hybrid MPI/OpenMP. For best +# performance, it is recommended to run one MPI task per socket, and OpenMP threads per task. + +module purge +module load rhel7/default-peta4 +module unload intel/bundles/complib/2017.4 +module load intel/bundles/complib/2019.3 + +# Set to number of cores per socket +export OMP_NUM_THREADS=16 + +# Prevent processes from migrating and ensure each task + threads is exclusive +# to one socket. +export I_MPI_PIN=1 +export I_MPI_PIN_DOMAIN=omp:compact + + +mpirun trove.x file1.inp diff --git a/makefile b/makefile index bbcd01d..cc25a32 100644 --- a/makefile +++ b/makefile @@ -11,9 +11,9 @@ checkin: pot_user = pot_ch4 PLAT = _2205_i17 -###FOR = ifort +#FOR = ifort FOR = mpif90 -FFLAGS = -qopenmp -xcore-avx2 -O3 -ip +FFLAGS = -qopenmp -xHost -O3 -ip #FFLAGS = -fopenmp -ffree-line-length-none -march=native -O3 -fcray-pointer -g3 From 75a7220a2b1c301a532dfc90aa0684078dfbbfc4 Mon Sep 17 00:00:00 2001 From: Arjen Tamerus Date: Mon, 3 Jun 2019 14:25:39 +0100 Subject: [PATCH 11/79] Commented out some unfinished test code --- mpi_aux.f90 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mpi_aux.f90 b/mpi_aux.f90 index 70dae59..7140e08 100644 --- a/mpi_aux.f90 +++ b/mpi_aux.f90 @@ -263,7 +263,7 @@ subroutine co_init_distr(dimen, startdim, enddim, blocksize) call co_create_type(dimen) deallocate(starts,ends) - call co_init_pblas() + !call co_init_pblas() distr_inited = .true. end subroutine co_init_distr From 4cff7f4b6320c940c797e060e39b0d6c8bfae2e2 Mon Sep 17 00:00:00 2001 From: Arjen Tamerus Date: Fri, 14 Jun 2019 14:19:17 +0100 Subject: [PATCH 12/79] Fix issues w/ parallel I/O, avoids double write in tran.f90 and makes sure data is distributed on read in perturbation.f90 --- mpi_aux.f90 | 80 ++++++++++++----------- perturbation.f90 | 163 ++++++++++++++++++++++++++++++++--------------- tran.f90 | 73 +++++++++++---------- 3 files changed, 194 insertions(+), 122 deletions(-) diff --git a/mpi_aux.f90 b/mpi_aux.f90 index 7140e08..e3e818c 100644 --- a/mpi_aux.f90 +++ b/mpi_aux.f90 @@ -5,7 +5,6 @@ module mpi_aux implicit none public co_init_comms, co_finalize_comms, co_init_distr, co_distr_data, co_write_matrix_distr, co_read_matrix_distr - public co_create_type public send_or_recv, comm_size, mpi_rank public co_startdim, co_enddim @@ -179,16 +178,22 @@ subroutine co_init_distr(dimen, startdim, enddim, blocksize) integer :: i, ierr, to_calc if (.not. comms_inited) stop "COMMS NOT INITIALISED" - if (distr_inited) stop "DISTRIBUTION ALREADY INITIALISED" + !if (distr_inited) stop "DISTRIBUTION ALREADY INITIALISED" proc_index = mpi_rank+1 - allocate(proc_sizes(comm_size),proc_offsets(comm_size),send_or_recv(comm_size),starts(comm_size),ends(comm_size),stat=ierr) - if (ierr .gt. 0) stop "CO_INIT_DISTR ALLOCATION FAILED" + if (.not. distr_inited) then + allocate(proc_sizes(comm_size),proc_offsets(comm_size),send_or_recv(comm_size),starts(comm_size),ends(comm_size),stat=ierr) + if (ierr .gt. 0) stop "CO_INIT_DISTR ALLOCATION FAILED" + else + allocate(starts(comm_size),ends(comm_size),stat=ierr) + endif if (comm_size .eq. 1) then startdim = 1 enddim = dimen + co_startdim = 1 + co_enddim = dimen blocksize = dimen*dimen send_or_recv(1) = 0 else @@ -231,7 +236,9 @@ subroutine co_init_distr(dimen, startdim, enddim, blocksize) co_startdim = startdim co_enddim = enddim - allocate(mpi_blocktype(comm_size)) + if(.not. distr_inited) then + allocate(mpi_blocktype(comm_size)) + endif do i=1,comm_size if (mod(comm_size,2).eq.1) then @@ -260,7 +267,12 @@ subroutine co_init_distr(dimen, startdim, enddim, blocksize) endif - call co_create_type(dimen) + if (comm_size .eq. 1) then + call co_create_type_column(dimen,dimen,dimen) + else + call co_create_type_column(dimen,comm_size*(int(1+real(dimen/comm_size))),enddim-startdim+1) + endif + deallocate(starts,ends) !call co_init_pblas() @@ -280,6 +292,8 @@ subroutine co_distr_data(x, tmp, blocksize, lb, ub) integer :: i, icoeff, jcoeff, offset, ierr, k type(MPI_Request) :: reqs(comm_size) + if (comm_size.eq.1) return + call TimerStart('MPI_transpose') call TimerStart('MPI_transpose_sendrecv') @@ -329,22 +343,19 @@ subroutine co_read_matrix_distr(x, longdim, lb, ub, infile) integer(kind=MPI_Offset_kind) :: offset_start,offset_end integer :: readcount, mpi_real_size, ierr - call MPI_Type_size(mpi_double_precision, mpi_real_size,ierr) + if (comm_size.gt.1) then + call MPI_Type_size(mpi_double_precision, mpi_real_size,ierr) + + offset_start = (lb-1)*longdim*mpi_real_size + offset_end = (longdim-ub)*longdim*mpi_real_size - if (mpi_rank .lt. (comm_size-1)) then - readcount = int(1+real(longdim/comm_size)) + call MPI_File_seek(infile, offset_start, MPI_SEEK_CUR) + call MPI_File_read_all(infile,x,1,mpitype_column,writestat,ierr) + call MPI_File_seek(infile, offset_end, MPI_SEEK_CUR) else - readcount = longdim-((comm_size-1)*int(1+real(longdim/comm_size))) + call MPI_File_read_all(infile,x,1,mpitype_column,writestat,ierr) endif - offset_start = mpi_rank * (longdim * int(1+real(longdim/comm_size),mpi_offset_kind) * mpi_real_size) - offset_end = longdim - offset_end = (offset_end * offset_end * mpi_real_size) - offset_start - - call MPI_File_seek(infile, offset_start, MPI_SEEK_CUR) - call MPI_File_read_all(infile,x,readcount,mpitype_column,writestat,ierr) - call MPI_File_seek(infile, offset_end, MPI_SEEK_CUR) - end subroutine co_read_matrix_distr subroutine co_write_matrix_distr(x, longdim, lb, ub, outfile) @@ -353,8 +364,7 @@ subroutine co_write_matrix_distr(x, longdim, lb, ub, outfile) real(rk),dimension(:,lb:),intent(in) :: x integer,intent(in) :: longdim, lb, ub type(MPI_File),intent(in) :: outfile - integer :: ierr, mpi_real_size, writecount, mpi_col_size - !integer(kind=MPI_Offset_kind) :: mpioffset,mpi_write_offsetkind + integer :: ierr, mpi_real_size integer(kind=MPI_Offset_kind) :: offset_start, offset_end type(MPI_Status) :: writestat @@ -362,33 +372,31 @@ subroutine co_write_matrix_distr(x, longdim, lb, ub, outfile) call TimerStart('MPI_write') - call MPI_Type_size(mpi_double_precision, mpi_real_size,ierr) - call MPI_Type_size(mpitype_column, mpi_col_size,ierr) + if (comm_size.gt.1) then + call MPI_Type_size(mpi_double_precision, mpi_real_size,ierr) - if (mpi_rank .lt. (comm_size-1)) then - writecount = int(1+real(longdim/comm_size)) + offset_start = (lb-1)*longdim*mpi_real_size + offset_end = 0 + + call MPI_File_seek(outfile, offset_start, MPI_SEEK_END) + call MPI_File_write_all(outfile,x,1,mpitype_column,writestat,ierr) + call MPI_File_seek(outfile, offset_end, MPI_SEEK_END) else - writecount = longdim-((comm_size-1)*int(1+real(longdim/comm_size))) + call MPI_File_write_all(outfile,x,1,mpitype_column,writestat,ierr) endif - offset_start = mpi_rank * int(1+real(longdim/comm_size),mpi_offset_kind) * mpi_col_size - offset_end = 0 - - call MPI_File_seek(outfile, offset_start, MPI_SEEK_END) - call MPI_File_write_all(outfile,x,writecount,mpitype_column,writestat,ierr) - call MPI_File_seek(outfile, offset_end, MPI_SEEK_END) call TimerStop('MPI_write') end subroutine co_write_matrix_distr - subroutine co_create_type(extent) - integer, intent(in) :: extent - integer :: ierr + subroutine co_create_type_column(extent, blocksize, ncols) + integer, intent(in) :: extent, blocksize, ncols + integer :: ierr,writecount - call MPI_Type_contiguous(extent, mpi_double_precision, mpitype_column, ierr) + call MPI_Type_vector(ncols, extent, blocksize, mpi_double_precision, mpitype_column, ierr) call MPI_Type_commit(mpitype_column, ierr) - end subroutine co_create_type + end subroutine co_create_type_column subroutine co_create_type_subarray(extent, coldim, rowdim, blockid, mpi_newtype) integer,intent(in) :: extent, coldim, rowdim, blockid diff --git a/perturbation.f90 b/perturbation.f90 index aaa97d3..c436826 100644 --- a/perturbation.f90 +++ b/perturbation.f90 @@ -6532,7 +6532,9 @@ subroutine PThamiltonian_contract(jrot) character(len=cl) :: unitfname,filename,statusf,symchar logical :: only_store = .false. logical :: no_diagonalization = .false. + !AT type(MPI_File) :: mpiiofile + integer :: startdim,enddim,localrootsize ! ! A special case when the diagonlization is to be skipped ! @@ -6851,7 +6853,6 @@ subroutine PThamiltonian_contract(jrot) task = 'top' !call PTrestore_rot_kinetic_matrix_elements(jrot,task,iunit,dimen,ncontr,maxcontr) call PTrestore_rot_kinetic_matrix_elements_mpi(jrot,task,mpiiofile,dimen,& - !PT, PTvibrational_me_calc,grot,gcor,hvib, & ncontr,maxcontr) ! ! We have two calculation options: fast and cheap and slow but expensive. @@ -7089,6 +7090,11 @@ subroutine PThamiltonian_contract(jrot) if (FLrotation.and.jrot/=0.and..not.job%vib_rot_contr) then ! if (job%verbose>=4) write(out,"(' Rotational part...')") + ! + ! AT - Initialise parallel distribution (if not done yet) + ! TODO - find correct spot to place this w/ appropriate guard clauses + ! NOTE don't know `ncontr` until after calling PTrestore.. with `task=='top'` + call co_init_distr(ncontr, startdim, enddim, localrootsize) ! task = 'rot' ! @@ -7729,7 +7735,7 @@ subroutine close_chkptfile_mpi(fileh) endif end subroutine close_chkptfile_mpi - subroutine PTrestore_rot_kinetic_matrix_elements_MPI(jrot, task, fileh, dimen, & + subroutine PTrestore_rot_kinetic_matrix_elements_mpi(jrot, task, fileh, dimen, & !PT, PTvibrational_me_calc,grot,gcor,hvib, & ncontr, maxcontr, icontr) use mpi_f08 @@ -7758,6 +7764,9 @@ subroutine PTrestore_rot_kinetic_matrix_elements_MPI(jrot, task, fileh, dimen, & real(rk) :: f_t integer :: k1,k2,islice + !AT + integer :: localmatrix_x,localmatrix_y + integer(hik) :: localrootsize if ( trim(job%IOkinet_action)/='VIB_READ'.and.trim(job%IOkinet_action)/='READ'.and..not.job%contrci_me_fast) return @@ -7772,11 +7781,18 @@ subroutine PTrestore_rot_kinetic_matrix_elements_MPI(jrot, task, fileh, dimen, & !dimen = max(min(int(PT%Maxcontracts*job%compress),PT%Maxcontracts),1) call MPI_Type_size(mpi_double_precision, mpi_real_size,ierr) - call MPI_Type_size(mpi_integer, mpi_real_size,ierr) + call MPI_Type_size(mpi_integer, mpi_int_size,ierr) if (mpi_rank .eq. 0 .and. (job%verbose>=6.and.present(icontr)) ) write(out,"('icontr = ',i9)") icontr - if (mpi_rank .eq. 0) write(*,*) "DEBUG|TASK =", task + !AT - determine task-local matrix dimensions + if (comm_size.eq.1) then + localmatrix_y = ncontr + else + localmatrix_y = int(1+real(ncontr/comm_size))*comm_size + endif + localmatrix_x = co_enddim-co_startdim+1 + localrootsize = int(localmatrix_x*localmatrix_y,hik) select case (trim(task)) case('top') @@ -7787,7 +7803,6 @@ subroutine PTrestore_rot_kinetic_matrix_elements_MPI(jrot, task, fileh, dimen, & !TODO set filename dynamically filename = trim(job%matelem_suffix)//'.chk' - write(*,*) "FILENAME", filename call open_chkptfile_mpi(fileh, filename, 'read') @@ -7853,7 +7868,7 @@ subroutine PTrestore_rot_kinetic_matrix_elements_MPI(jrot, task, fileh, dimen, & file_offset = (PT%Nclasses+1)*ncontr*mpi_int_size call mpi_file_seek(fileh, file_offset, MPI_SEEK_CUR) - deallocate(imat_t) + !deallocate(imat_t) else @@ -7930,7 +7945,7 @@ subroutine PTrestore_rot_kinetic_matrix_elements_MPI(jrot, task, fileh, dimen, & ! ! Read the rotational part only ! - allocate(mat_(maxcontr,maxcontr),stat=ierr) + !allocate(mat_(maxcontr,maxcontr),stat=ierr) call ArrayStart('PThamiltonian_contract: mat_',ierr,1,kind(mat_),rootsize2_) ! if (.not.job%IOmatelem_split) then @@ -7946,6 +7961,28 @@ subroutine PTrestore_rot_kinetic_matrix_elements_MPI(jrot, task, fileh, dimen, & ! allocate(grot(3,3),stat=ierr) ! + !islice = 0 + ! + !do k1 = 1,3 + ! ! + ! do k2 = 1,3 + ! ! + ! islice = islice + 1 + ! ! + ! call divided_slice_open_mpi(islice,fileh_slice,'g_rot',job%matelem_suffix) + ! ! + ! allocate(grot(k1,k2)%me(maxcontr,maxcontr),stat=ierr) + ! call ArrayStart('grot-matrix',ierr,1,kind(f_t),rootsize2_) + ! ! + ! call MPI_File_read_all(fileh, mat_(1:ncontr,1:ncontr), ncontr*ncontr, mpi_double_precision, mpi_status_ignore, ierr) + ! ! + ! grot(k1,k2)%me(1:ncontr,1:ncontr) = mat_(1:ncontr,1:ncontr) + ! ! + ! call divided_slice_close_mpi(islice,fileh_slice,'g_rot') + ! ! + ! enddo + !enddo + ! islice = 0 ! do k1 = 1,3 @@ -7956,19 +7993,16 @@ subroutine PTrestore_rot_kinetic_matrix_elements_MPI(jrot, task, fileh, dimen, & ! call divided_slice_open_mpi(islice,fileh_slice,'g_rot',job%matelem_suffix) ! - allocate(grot(k1,k2)%me(maxcontr,maxcontr),stat=ierr) - call ArrayStart('grot-matrix',ierr,1,kind(f_t),rootsize2_) + allocate(grot(k1,k2)%me(localmatrix_y,co_startdim:co_enddim),stat=ierr) + call ArrayStart('grot-matrix',ierr,1,kind(f_t),localrootsize) ! - call MPI_File_read_all(fileh, mat_(1:ncontr,1:ncontr), ncontr*ncontr, mpi_double_precision, mpi_status_ignore, ierr) - ! - grot(k1,k2)%me(1:ncontr,1:ncontr) = mat_(1:ncontr,1:ncontr) + call co_read_matrix_distr(grot(k1,k2)%me, ncontr, co_startdim, co_enddim, fileh) ! call divided_slice_close_mpi(islice,fileh_slice,'g_rot') ! enddo enddo - ! - deallocate(mat_) + !deallocate(mat_) call ArrayStop('PThamiltonian_contract: mat_') ! if (job%verbose>=4) write(out,"(' ...done!')") @@ -7977,8 +8011,8 @@ subroutine PTrestore_rot_kinetic_matrix_elements_MPI(jrot, task, fileh, dimen, & ! if (mpi_rank .eq. 0 .and. job%verbose>=4) write(out,"(' Read and process Coriolis part...')") ! - allocate(mat_(maxcontr,maxcontr),stat=ierr) - call ArrayStart('PThamiltonian_contract: mat_',ierr,1,kind(mat_),rootsize2_) + !allocate(mat_(maxcontr,maxcontr),stat=ierr) + !call ArrayStart('PThamiltonian_contract: mat_',ierr,1,kind(mat_),rootsize2_) ! if (.not.job%IOmatelem_split) then ! @@ -7995,25 +8029,40 @@ subroutine PTrestore_rot_kinetic_matrix_elements_MPI(jrot, task, fileh, dimen, & ! islice = 9 ! + !do k1 = 1,3 + ! ! + ! islice = islice + 1 + ! ! + ! allocate(gcor(k1)%me(maxcontr,maxcontr),stat=ierr) + ! call ArrayStart('gcor-matrix',ierr,1,kind(f_t),rootsize2_) + ! ! + ! call divided_slice_open_mpi(islice,fileh_slice,'g_cor',job%matelem_suffix) + ! ! + ! call MPI_File_read_all(fileh, mat_(1:ncontr,1:ncontr), ncontr*ncontr, mpi_double_precision, mpi_status_ignore, ierr) + ! ! + ! gcor(k1)%me(1:ncontr,1:ncontr) = mat_(1:ncontr,1:ncontr) + ! ! + ! call divided_slice_close_mpi(islice,fileh_slice,'g_cor') + ! ! + !enddo + ! do k1 = 1,3 ! islice = islice + 1 ! - allocate(gcor(k1)%me(maxcontr,maxcontr),stat=ierr) - call ArrayStart('gcor-matrix',ierr,1,kind(f_t),rootsize2_) + allocate(gcor(k1)%me(localmatrix_y,co_startdim:co_enddim),stat=ierr) + call ArrayStart('gcor-matrix',ierr,1,kind(f_t),localrootsize) ! call divided_slice_open_mpi(islice,fileh_slice,'g_cor',job%matelem_suffix) ! - call MPI_File_read_all(fileh, mat_(1:ncontr,1:ncontr), ncontr*ncontr, mpi_double_precision, mpi_status_ignore, ierr) - ! - gcor(k1)%me(1:ncontr,1:ncontr) = mat_(1:ncontr,1:ncontr) + call co_read_matrix_distr(gcor(k1)%me, ncontr, co_startdim, co_enddim, fileh) ! call divided_slice_close_mpi(islice,fileh_slice,'g_cor') ! enddo ! - deallocate(mat_) - call ArrayStop('PThamiltonian_contract: mat_') + !deallocate(mat_) + !call ArrayStop('PThamiltonian_contract: mat_') ! if (mpi_rank .eq. 0 .and. job%verbose>=4) write(out,"(' ...done!')") ! @@ -8023,8 +8072,8 @@ subroutine PTrestore_rot_kinetic_matrix_elements_MPI(jrot, task, fileh, dimen, & ! if (.not.job%IOmatelem_split.and.( (.not.FLrotation.or.jrot==0).and.trim(job%IOkinet_action)/='VIB_READ' ) ) then ! - allocate(mat_t(maxcontr,maxcontr),stat=ierr) - call ArrayStart('PThamiltonian_contract: mat_t',ierr,1,kind(mat_t),rootsize2_) + !allocate(mat_t(maxcontr,maxcontr),stat=ierr) + !call ArrayStart('PThamiltonian_contract: mat_t',ierr,1,kind(mat_t),rootsize2_) ! call MPI_File_read_all(fileh, readbuf, 5, mpi_character, mpi_status_ignore, ierr) if (readbuf(1:5)/='g_rot') then @@ -8033,13 +8082,15 @@ subroutine PTrestore_rot_kinetic_matrix_elements_MPI(jrot, task, fileh, dimen, & stop 'PTrestore_rot_kinetic_matrix_elements - in file - g_rot missing' end if ! - do k1 = 1,3 - do k2 = 1,3 - ! - call MPI_File_read_all(fileh, mat_(1:ncontr,1:ncontr), ncontr*ncontr, mpi_double_precision, mpi_status_ignore, ierr) - ! - enddo - enddo + !do k1 = 1,3 + ! do k2 = 1,3 + ! ! + ! call MPI_File_read_all(fileh, mat_(1:ncontr,1:ncontr), ncontr*ncontr, mpi_double_precision, mpi_status_ignore, ierr) + ! ! + ! enddo + !enddo + file_offset = 9*ncontr*ncontr*mpi_real_size + call MPI_File_seek(fileh, file_offset, MPI_SEEK_CUR) ! call MPI_File_read_all(fileh, readbuf, 5, mpi_character, mpi_status_ignore, ierr) if (readbuf(1:5)/='g_cor') then @@ -8048,16 +8099,18 @@ subroutine PTrestore_rot_kinetic_matrix_elements_MPI(jrot, task, fileh, dimen, & stop 'PTrestore_rot_kinetic_matrix_elements - in file - g_cor missing' end if ! - !do k1 = 1,PT%Nmodes - do k2 = 1,3 - ! - call MPI_File_read_all(fileh, mat_(1:ncontr,1:ncontr), ncontr*ncontr, mpi_double_precision, mpi_status_ignore, ierr) - ! - enddo + !!do k1 = 1,PT%Nmodes + !do k2 = 1,3 + ! ! + ! call MPI_File_read_all(fileh, mat_(1:ncontr,1:ncontr), ncontr*ncontr, mpi_double_precision, mpi_status_ignore, ierr) + ! ! !enddo + !!enddo + file_offset = 3*ncontr*ncontr*mpi_real_size + call MPI_File_seek(fileh, file_offset, MPI_SEEK_CUR) ! - deallocate(mat_t) - call ArrayStop('mat_t') + !deallocate(mat_t) + !call ArrayStop('mat_t') ! endif ! @@ -8074,18 +8127,19 @@ subroutine PTrestore_rot_kinetic_matrix_elements_MPI(jrot, task, fileh, dimen, & stop 'PTrestore_rot_kinetic_matrix_elements - in file - hvib or End missing' end if ! - allocate(mat_(maxcontr,maxcontr),stat=ierr) - call ArrayStart('PThamiltonian_contract: mat_',ierr,1,kind(mat_),rootsize2_) + !allocate(mat_(maxcontr,maxcontr),stat=ierr) + !call ArrayStart('PThamiltonian_contract: mat_',ierr,1,kind(mat_),rootsize2_) ! - allocate(hvib%me(maxcontr,maxcontr),stat=ierr) - call ArrayStart('hvib-matrix',ierr,1,kind(f_t),rootsize2_) + allocate(hvib%me(localmatrix_y,co_startdim:co_enddim),stat=ierr) + call ArrayStart('hvib-matrix',ierr,1,kind(f_t),localrootsize) ! - call MPI_File_read_all(fileh, mat_(1:ncontr,1:ncontr), ncontr*ncontr, mpi_double_precision, mpi_status_ignore, ierr) + call co_read_matrix_distr(hvib%me, ncontr, co_startdim, co_enddim, fileh) + !call MPI_File_read_all(fileh, mat_(1:ncontr,1:ncontr), ncontr*ncontr, mpi_double_precision, mpi_status_ignore, ierr) ! - hvib%me(1:ncontr,1:ncontr) = mat_(1:ncontr,1:ncontr) + !hvib%me(1:ncontr,1:ncontr) = mat_(1:ncontr,1:ncontr) ! - deallocate(mat_) - call ArrayStop('PThamiltonian_contract: mat_') + !deallocate(mat_) + !call ArrayStop('PThamiltonian_contract: mat_') ! call MPI_File_read_all(fileh, readbuf, 16, mpi_character, mpi_status_ignore, ierr) if (readbuf(1:16)/='End Kinetic part') then @@ -9834,7 +9888,6 @@ subroutine diagonalization_contract(jrot,gamma,dimen_s,mat,zpe,rlevel,total_root ! ! diagonalization schemes ! - write(*,*) "DIAGONALIZER", trim(job%diagonalizer) select case (trim(job%diagonalizer)) ! case default @@ -15290,11 +15343,15 @@ subroutine PTcontracted_matelem_class(jrot) rootsize = int(mdimen*mdimen,hik) call co_init_distr(mdimen, startdim, enddim, blocksize_) allocate(reqs(comm_size)) - write(*,*) "SENDRECV:", mpi_rank, send_or_recv + !write(*,*) "SENDRECV:", mpi_rank, send_or_recv, startdim, enddim - mdimen_p = int(1+real(mdimen/comm_size)) - mdimen_b = comm_size*mdimen_p - !DEBUG!write(*,*) "DIMS", mpi_rank, mdimen, mdimen_b, mdimen_p, comm_size*mdimen_b*mdimen_p + if (comm_size .gt. 1) then + mdimen_p = int(1+real(mdimen/comm_size)) + mdimen_b = comm_size*mdimen_p + else + mdimen_p = mdimen + mdimen_b = mdimen + endif blocksize = blocksize_ ! ! The vibrational (J=0) matrix elements of the rotational and coriolis diff --git a/tran.f90 b/tran.f90 index 6534ef5..45f56d2 100644 --- a/tran.f90 +++ b/tran.f90 @@ -1403,23 +1403,28 @@ subroutine TRconvert_matel_j0_eigen(jrot) job_is ='Eigen-vib. matrix elements of the rot. kinetic part' call IOStart(trim(job_is),chkptIO) ! - !open(chkptIO,form='unformatted',action='write',position='rewind',status='replace',file=job%kineteigen_file) - !write(chkptIO) 'Start Kinetic part' - call MPI_File_open(mpi_comm_world, job%kineteigen_file, mpi_mode_wronly+mpi_mode_create, mpi_info_null, fileh_w, ierr) - call MPI_File_set_errhandler(fileh_w, MPI_ERRORS_ARE_FATAL) - mpioffset = 0 - call MPI_File_set_size(fileh_w, mpioffset, ierr) - call MPI_File_write(fileh_w, '[MPIIO]', 7, mpi_character, mpi_status_ignore, ierr) - call MPI_File_write(fileh_w, 'Start Kinetic part', 18, mpi_character, mpi_status_ignore, ierr) - ! - treat_vibration = .false. - ! - !call PTstore_icontr_cnu(Neigenroots,chkptIO,job%IOj0matel_action) - call PTstorempi_icontr_cnu(Neigenroots,fileh_w,job%IOj0matel_action) - ! - if (job%vib_rot_contr) then - !write(chkptIO) 'vib-rot' - call MPI_File_write(fileh_w, 'vib-rot', 7, mpi_character, mpi_status_ignore, ierr) + !open(chkptIO,form='unformatted',action='write',position='rewind',status='replace',file=job%kineteigen_file) + !write(chkptIO) 'Start Kinetic part' + call MPI_File_open(mpi_comm_world, job%kineteigen_file, mpi_mode_wronly+mpi_mode_create, mpi_info_null, fileh_w, ierr) + call MPI_File_set_errhandler(fileh_w, MPI_ERRORS_ARE_FATAL) + mpioffset = 0 + call MPI_File_set_size(fileh_w, mpioffset, ierr) + if(mpi_rank .eq. 0) then + call MPI_File_write(fileh_w, '[MPIIO]', 7, mpi_character, mpi_status_ignore, ierr) + call MPI_File_write(fileh_w, 'Start Kinetic part', 18, mpi_character, mpi_status_ignore, ierr) + ! + treat_vibration = .false. + ! + !call PTstore_icontr_cnu(Neigenroots,chkptIO,job%IOj0matel_action) + call PTstorempi_icontr_cnu(Neigenroots,fileh_w,job%IOj0matel_action) + ! + if (job%vib_rot_contr) then + !write(chkptIO) 'vib-rot' + call MPI_File_write(fileh_w, 'vib-rot', 7, mpi_character, mpi_status_ignore, ierr) + endif + else + mpioffset = 0 + treat_vibration = .false. endif ! endif @@ -1459,7 +1464,7 @@ subroutine TRconvert_matel_j0_eigen(jrot) task = 'rot' ! !write(chkptIO) 'g_rot' - call MPI_File_write(fileh_w, 'g_rot', 5, mpi_character, mpi_status_ignore, ierr) + if(mpi_rank.eq.0) call MPI_File_write(fileh_w, 'g_rot', 5, mpi_character, mpi_status_ignore, ierr) ! !call restore_rot_kinetic_matrix_elements(jrot,treat_vibration,task,iunit) call restore_rot_kinetic_matrix_elements_mpi(jrot,treat_vibration,task,fileh) @@ -1511,16 +1516,16 @@ subroutine TRconvert_matel_j0_eigen(jrot) ! if (job%IOmatelem_split.and..not.job%vib_rot_contr) then ! - call divided_slice_write(islice,'g_rot',job%j0matelem_suffix,Neigenroots,mat_s) + if(mpi_rank.eq.0) call divided_slice_write(islice,'g_rot',job%j0matelem_suffix,Neigenroots,mat_s) ! elseif (job%IOmatelem_split.and.job%vib_rot_contr) then ! - call divided_slice_write_vibrot(islice,job%j0matelem_suffix,Neigenroots,mat_s) + if(mpi_rank.eq.0) call divided_slice_write_vibrot(islice,job%j0matelem_suffix,Neigenroots,mat_s) ! else ! !write (chkptIO) mat_s - call MPI_File_write(fileh_w, mat_s, Neigenroots*Neigenroots, mpi_double_precision, mpi_status_ignore, ierr) + if(mpi_rank.eq.0) call MPI_File_write(fileh_w, mat_s, Neigenroots*Neigenroots, mpi_double_precision, mpi_status_ignore, ierr) ! endif ! @@ -1541,7 +1546,7 @@ subroutine TRconvert_matel_j0_eigen(jrot) call restore_rot_kinetic_matrix_elements_mpi(jrot,treat_vibration,task,fileh) ! !write(chkptIO) 'g_cor' - call MPI_File_write(fileh_w, 'g_cor', 5, mpi_character, mpi_status_ignore, ierr) + if(mpi_rank.eq.0) call MPI_File_write(fileh_w, 'g_cor', 5, mpi_character, mpi_status_ignore, ierr) ! endif ! @@ -1584,16 +1589,16 @@ subroutine TRconvert_matel_j0_eigen(jrot) ! if (job%IOmatelem_split.and..not.job%vib_rot_contr) then ! - call divided_slice_write(islice,'g_cor',job%j0matelem_suffix,Neigenroots,mat_s) + if(mpi_rank.eq.0) call divided_slice_write(islice,'g_cor',job%j0matelem_suffix,Neigenroots,mat_s) ! elseif (job%IOmatelem_split.and.job%vib_rot_contr) then ! - call divided_slice_write_vibrot(islice,job%j0matelem_suffix,Neigenroots,mat_s) + if(mpi_rank.eq.0) call divided_slice_write_vibrot(islice,job%j0matelem_suffix,Neigenroots,mat_s) ! else ! !write (chkptIO) mat_s - call MPI_File_write(fileh_w, mat_s, Neigenroots*Neigenroots, mpi_double_precision, mpi_status_ignore, ierr) + if(mpi_rank.eq.0) call MPI_File_write(fileh_w, mat_s, Neigenroots*Neigenroots, mpi_double_precision, mpi_status_ignore, ierr) ! endif ! @@ -1604,7 +1609,7 @@ subroutine TRconvert_matel_j0_eigen(jrot) if (job%verbose>=5) call TimerStop('J0-convertion for g_cor') ! !if (.not.job%IOmatelem_split.or.job%iswap(1)==1) write(chkptIO) 'End Kinetic part' - if (.not.job%IOmatelem_split.or.job%iswap(1)==1) call MPI_File_write(fileh_w, 'End Kinetic part', 16, mpi_character, mpi_status_ignore, ierr) + if ((.not.job%IOmatelem_split.or.job%iswap(1)==1).and.(mpi_rank.eq.0)) call MPI_File_write(fileh_w, 'End Kinetic part', 16, mpi_character, mpi_status_ignore, ierr) ! if (.not.job%vib_rot_contr) then !close(chkptIO,status='keep') @@ -1692,13 +1697,15 @@ subroutine TRconvert_matel_j0_eigen(jrot) call MPI_File_set_errhandler(fileh_w, MPI_ERRORS_ARE_FATAL) mpioffset = 0 call MPI_File_set_size(fileh_w, mpioffset, ierr) + if(mpi_rank.eq.0) then call MPI_File_write(fileh_w, '[MPIIO]', 7, mpi_character, mpi_status_ignore, ierr) call MPI_File_write(fileh_w, 'Start external field', 20, mpi_character, mpi_status_ignore, ierr) + endif ! ! store the matrix elements ! !write(chkptIO) Neigenroots - call MPI_File_write(fileh_w, Neigenroots, 1, mpi_integer, mpi_status_ignore, ierr) + if(mpi_rank.eq.0) call MPI_File_write(fileh_w, Neigenroots, 1, mpi_integer, mpi_status_ignore, ierr) ! endif ! @@ -1769,12 +1776,12 @@ subroutine TRconvert_matel_j0_eigen(jrot) ! !write(chkptIO) imu !write(chkptIO) mat_s - call MPI_File_write_all(fileh_w, imu, 1, mpi_integer, mpi_status_ignore, ierr) - call MPI_File_write_all(fileh_w, mat_s, Neigenroots*Neigenroots, mpi_double_precision, mpi_status_ignore, ierr) + if(mpi_rank.eq.0) call MPI_File_write(fileh_w, imu, 1, mpi_integer, mpi_status_ignore, ierr) + if(mpi_rank.eq.0) call MPI_File_write(fileh_w, mat_s, Neigenroots*Neigenroots, mpi_double_precision, mpi_status_ignore, ierr) ! else ! - call divided_slice_write(imu,'extF',job%j0extmat_suffix,Neigenroots,mat_s) + if(mpi_rank.eq.0) call divided_slice_write(imu,'extF',job%j0extmat_suffix,Neigenroots,mat_s) ! endif ! @@ -1803,7 +1810,7 @@ subroutine TRconvert_matel_j0_eigen(jrot) if (.not.job%IOextF_divide.or.job%IOextF_stitch) then ! !write(chkptIO) 'End external field' - call MPI_File_write_all(fileh_w, 'End external field', 18, mpi_character, mpi_status_ignore, ierr) + if(mpi_rank.eq.0) call MPI_File_write(fileh_w, 'End external field', 18, mpi_character, mpi_status_ignore, ierr) !close(chkptIO,status='keep') call MPI_File_close(fileh_w, ierr) ! @@ -2312,12 +2319,12 @@ subroutine restore_rot_kinetic_matrix_elements_mpi(jrot,treat_vibration,kinetic_ !read(chkptIO) buf18 call MPI_File_read_all(fileh, buf18, 7, mpi_character, mpi_status_ignore, ierr) if (buf18(1:7)/='[MPIIO]') then - write (out,"(' Vib. kinetic checkpoint file ',a,' is not an MPIIO file: ',a)") 'mpiiofile',buf18 + write (out,"(' Vib. kinetic checkpoint file ',a,' is not an MPIIO file: ',a)") job%kinetmat_file,buf18 stop 'PTcontracted_matelem_class - Not an MPIIO file' end if call MPI_File_read_all(fileh, buf18, 18, mpi_character, mpi_status_ignore, ierr) if (buf18/='Start Kinetic part') then - write (out,"(' Vib. kinetic checkpoint file ',a,' has bogus header: ',a)") 'mpiiofile',buf18 + write (out,"(' Vib. kinetic checkpoint file ',a,' has bogus header: ',a)") job%kinetmat_file,buf18 stop 'PTcontracted_matelem_class - bogus file format' end if ! From 0d17cbc1991ea59e07194b392a4e1be6f0a761c7 Mon Sep 17 00:00:00 2001 From: Arjen Tamerus Date: Thu, 20 Jun 2019 10:06:35 +0100 Subject: [PATCH 13/79] [mpi_aux.f90] Fixed blacs implementation + cleanup [perturbation.f90] mpi_aux compatibility update --- mpi_aux.f90 | 171 ++++++++++++++++++++--------------------------- perturbation.f90 | 5 +- 2 files changed, 74 insertions(+), 102 deletions(-) diff --git a/mpi_aux.f90 b/mpi_aux.f90 index e3e818c..b0a94db 100644 --- a/mpi_aux.f90 +++ b/mpi_aux.f90 @@ -5,32 +5,26 @@ module mpi_aux implicit none public co_init_comms, co_finalize_comms, co_init_distr, co_distr_data, co_write_matrix_distr, co_read_matrix_distr + public co_block_type_init public send_or_recv, comm_size, mpi_rank public co_startdim, co_enddim public blacs_size, blacs_rank, blacs_ctxt public nprow,npcol,myprow,mypcol, desca,descb,descc + public mpi_real_size, mpi_int_size interface co_sum module procedure :: co_sum_double end interface - !interface co_max - ! module procedure :: co_max_double - !end interface - - interface co_gather - module procedure :: co_gather_double - module procedure :: co_gatherv_double - end interface - integer,dimension(:),allocatable :: proc_sizes, proc_offsets, send_or_recv integer :: comm_size, mpi_rank integer :: co_startdim, co_enddim logical :: comms_inited = .false., distr_inited=.false. type(MPI_Datatype) :: mpitype_column type(MPI_Datatype),dimension(:), allocatable :: mpi_blocktype + integer :: mpi_real_size, mpi_int_size !blacs/pblas integer :: blacs_size, blacs_rank, blacs_ctxt @@ -39,111 +33,93 @@ module mpi_aux integer :: descb(9) integer :: descc(9) integer :: descd(9) + integer,dimension(2) :: blacs_dims contains - subroutine co_init_pblas() + subroutine co_init_blacs() + implicit none + + if (.not. comms_inited) stop "CO_INIT_BLACS COMMS NOT INITED" + + ! Must be initialised to zero - if stack contains garbage here MPI_Dims_create WILL fail + blacs_dims = 0 + call blacs_pinfo(blacs_rank, blacs_size) if (blacs_rank .lt. 0) return + call MPI_Dims_create(blacs_size, 2, blacs_dims) + call blacs_get(-1, 0, blacs_ctxt) - call blacs_gridinit(blacs_ctxt, 'R', blacs_size/8, 8) + call blacs_gridinit(blacs_ctxt, 'R', blacs_dims(1), blacs_dims(2)) call blacs_gridinfo(blacs_ctxt, nprow, npcol, myprow, mypcol) - write(*,"('BLACS: [',i2,',',i2'](',i4,i4,i4,i4',)')") mpi_rank,blacs_rank,nprow,npcol,myprow,mypcol - end subroutine co_init_pblas + !write(*,"('BLACS: [',i2,',',i2'](',i4,i4,i4,i4',)')") mpi_rank,blacs_rank,nprow,npcol,myprow,mypcol + end subroutine co_init_blacs + + subroutine co_block_type_init(smat, dimx, dimy, descr, mpi_type) + implicit none + + real(rk),intent(out),dimension(:,:),allocatable :: smat + + integer,intent(in) :: dimx, dimy + integer,intent(out),dimension(9) :: descr + + type(MPI_Datatype),intent(out),optional :: mpi_type + + + integer,dimension(2) :: global_size, distr, dargs + integer :: MB,NB,MLOC,NLOC,ierr + + integer,external :: NUMROC + + if (.not. comms_inited) stop "CO_BLOCK_TYPE_INIT COMMS NOT INITED" + + MB = dimx/nprow + NB = dimy/npcol + MLOC = NUMROC( dimx, MB, myprow, 0, nprow ) + NLOC = NUMROC( dimy, NB, mypcol, 0, npcol ) + call DESCINIT(descr, dimx, dimy, MB, NB, 0, 0, blacs_ctxt, max(MLOC,1), ierr) + + allocate(smat(MLOC,NLOC)) + + if (present(mpi_type)) then + global_size = (/dimx, dimy/) + distr = (/MPI_DISTRIBUTE_CYCLIC, MPI_DISTRIBUTE_CYCLIC/) + dargs = (/MB, NB/) + call MPI_Type_create_darray(blacs_size, blacs_rank, 2, global_size, distr, dargs, blacs_dims, & + MPI_ORDER_FORTRAN, MPI_DOUBLE_PRECISION, mpi_type, ierr) + call MPI_Type_commit(mpi_type, ierr) + endif + + end subroutine co_block_type_init + + subroutine co_sum_double(x, root_process) + use accuracy - subroutine co_sum_double(x, result_image) - real*8, intent(inout), dimension(:,:) :: x - integer, optional :: result_image + implicit none + + real(rk), intent(inout), dimension(:,:) :: x + integer, optional :: root_process integer :: i !integer, save :: result_image_mpi[*] if (comm_size.eq.1) return call TimerStart('co_sum_double') - !if (present(result_image)) then - - !if (this_image() .eq. 1) then - ! call mpi_comm_rank(mpi_comm_world, result_image_mpi) - ! do i = 2, num_images() - ! result_image_mpi[i] = result_image_mpi - ! end do - !end if - !sync all + if (present(root_process)) then if (mpi_rank .eq. 0) then call mpi_reduce(mpi_in_place, x, size(x), mpi_double_precision, mpi_sum, 0, mpi_comm_world) else call mpi_reduce(x, x, size(x), mpi_double_precision, mpi_sum, 0, mpi_comm_world) endif - !else - ! call mpi_allreduce(mpi_in_place, x, size(x), mpi_double_precision, mpi_sum, mpi_comm_world) - !end if - call TimerStop('co_sum_double') - end subroutine - - !subroutine co_max_double(x, result_image) - ! real*8, intent(inout), dimension(:,:) :: x - ! integer, optional :: result_image - ! integer :: i - ! integer, save :: result_image_mpi[*] - - ! call TimerStart('co_max_double') - - ! if (present(result_image)) then - - ! if (this_image() .eq. 1) then - ! call mpi_comm_rank(mpi_comm_world, result_image_mpi) - ! do i = 2, num_images() - ! result_image_mpi[i] = result_image_mpi - ! end do - ! end if - ! sync all - - ! if (this_image() .eq. 1) then - ! call mpi_reduce(mpi_in_place, x, size(x), mpi_double_precision, mpi_max, result_image_mpi, mpi_comm_world) - ! else - ! call mpi_reduce(x, x, size(x), mpi_double_precision, mpi_max, result_image_mpi, mpi_comm_world) - ! endif - ! else - ! call mpi_allreduce(mpi_in_place, x, size(x), mpi_double_precision, mpi_max, mpi_comm_world) - ! end if - ! call TimerStop('co_max_double') - !end subroutine - - subroutine co_gather_double(x, static) - real*8, intent(inout), dimension(:,:) :: x - logical, intent(in) :: static - integer :: ierr - - if (.not. comms_inited .or. .not. distr_inited) stop "COMMS NOT INITIALISED" - if (comm_size .eq. 1) return - - call TimerStart('CO_GATHER_DOUBLE') - call mpi_gather(x, 0, mpi_double_precision, x, proc_sizes(2), mpi_double_precision, 0, mpi_comm_world) - if (ierr .gt. 0) stop "co_gather_double" - call TimerStop('CO_GATHER_DOUBLE') - - end subroutine co_gather_double - - subroutine co_gatherv_double(x) - real*8, intent(inout), dimension(:,:) :: x - integer :: ierr - - if (.not. comms_inited .or. .not. distr_inited) stop "COMMS NOT INITIALISED" - if (comm_size .eq. 1) return - - call TimerStart('CO_GATHERV_DOUBLE') - if (mpi_rank.eq.0) then - call mpi_gatherv(x, 0, mpi_double_precision, x, proc_sizes, proc_offsets, mpi_double_precision, 0, mpi_comm_world) else - call mpi_gatherv(x, size(x), mpi_double_precision, x, proc_sizes, proc_offsets, mpi_double_precision, 0, mpi_comm_world) - endif - if (ierr .gt. 0) stop "co_gatherv_double" - call TimerStop('CO_GATHERV_DOUBLE') + call mpi_allreduce(mpi_in_place, x, size(x), mpi_double_precision, mpi_sum, mpi_comm_world) + end if - end subroutine co_gatherv_double + call TimerStop('co_sum_double') + end subroutine subroutine co_init_comms() integer :: ierr @@ -155,8 +131,13 @@ subroutine co_init_comms() call mpi_comm_rank(mpi_comm_world, mpi_rank, ierr) if (ierr .gt. 0) stop "MPI_COMM_RANK" + call MPI_Type_size(mpi_double_precision, mpi_real_size,ierr) + call MPI_Type_size(mpi_integer, mpi_int_size,ierr) + comms_inited = .true. + call co_init_blacs() + end subroutine co_init_comms subroutine co_finalize_comms() @@ -275,8 +256,6 @@ subroutine co_init_distr(dimen, startdim, enddim, blocksize) deallocate(starts,ends) - !call co_init_pblas() - distr_inited = .true. end subroutine co_init_distr @@ -341,11 +320,9 @@ subroutine co_read_matrix_distr(x, longdim, lb, ub, infile) type(MPI_File),intent(in) :: infile type(MPI_Status) :: writestat integer(kind=MPI_Offset_kind) :: offset_start,offset_end - integer :: readcount, mpi_real_size, ierr + integer :: readcount, ierr if (comm_size.gt.1) then - call MPI_Type_size(mpi_double_precision, mpi_real_size,ierr) - offset_start = (lb-1)*longdim*mpi_real_size offset_end = (longdim-ub)*longdim*mpi_real_size @@ -364,7 +341,7 @@ subroutine co_write_matrix_distr(x, longdim, lb, ub, outfile) real(rk),dimension(:,lb:),intent(in) :: x integer,intent(in) :: longdim, lb, ub type(MPI_File),intent(in) :: outfile - integer :: ierr, mpi_real_size + integer :: ierr integer(kind=MPI_Offset_kind) :: offset_start, offset_end type(MPI_Status) :: writestat @@ -373,8 +350,6 @@ subroutine co_write_matrix_distr(x, longdim, lb, ub, outfile) call TimerStart('MPI_write') if (comm_size.gt.1) then - call MPI_Type_size(mpi_double_precision, mpi_real_size,ierr) - offset_start = (lb-1)*longdim*mpi_real_size offset_end = 0 diff --git a/perturbation.f90 b/perturbation.f90 index c436826..237364d 100644 --- a/perturbation.f90 +++ b/perturbation.f90 @@ -7757,7 +7757,7 @@ subroutine PTrestore_rot_kinetic_matrix_elements_mpi(jrot, task, fileh, dimen, & type(MPI_File) :: fileh_slice character(len=cl) :: job_id,filename,readbuf integer(kind=MPI_Offset_kind) :: file_offset - integer :: ierr, mpi_real_size, mpi_int_size + integer :: ierr integer(hik) :: rootsize,rootsize_,rootsize2,rootsize2_,nprocs,tid,icontr1,icontr2 integer(ik),allocatable :: imat_t(:,:) real(rk),allocatable :: mat_t(:,:),mat_(:,:) @@ -7780,9 +7780,6 @@ subroutine PTrestore_rot_kinetic_matrix_elements_mpi(jrot, task, fileh, dimen, & ! !dimen = max(min(int(PT%Maxcontracts*job%compress),PT%Maxcontracts),1) - call MPI_Type_size(mpi_double_precision, mpi_real_size,ierr) - call MPI_Type_size(mpi_integer, mpi_int_size,ierr) - if (mpi_rank .eq. 0 .and. (job%verbose>=6.and.present(icontr)) ) write(out,"('icontr = ',i9)") icontr !AT - determine task-local matrix dimensions From 08c1c06314a1e2129c0f35021c42de32025ba7e9 Mon Sep 17 00:00:00 2001 From: Arjen Tamerus Date: Thu, 20 Jun 2019 11:06:49 +0100 Subject: [PATCH 14/79] [mpi_aux.f90] return allocation status in co_block_type_init --- mpi_aux.f90 | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/mpi_aux.f90 b/mpi_aux.f90 index b0a94db..0c0275b 100644 --- a/mpi_aux.f90 +++ b/mpi_aux.f90 @@ -57,17 +57,17 @@ subroutine co_init_blacs() !write(*,"('BLACS: [',i2,',',i2'](',i4,i4,i4,i4',)')") mpi_rank,blacs_rank,nprow,npcol,myprow,mypcol end subroutine co_init_blacs - subroutine co_block_type_init(smat, dimx, dimy, descr, mpi_type) + subroutine co_block_type_init(smat, dimx, dimy, descr, allocinfo, mpi_type) implicit none real(rk),intent(out),dimension(:,:),allocatable :: smat integer,intent(in) :: dimx, dimy integer,intent(out),dimension(9) :: descr + integer,intent(out) :: allocinfo type(MPI_Datatype),intent(out),optional :: mpi_type - integer,dimension(2) :: global_size, distr, dargs integer :: MB,NB,MLOC,NLOC,ierr @@ -81,7 +81,8 @@ subroutine co_block_type_init(smat, dimx, dimy, descr, mpi_type) NLOC = NUMROC( dimy, NB, mypcol, 0, npcol ) call DESCINIT(descr, dimx, dimy, MB, NB, 0, 0, blacs_ctxt, max(MLOC,1), ierr) - allocate(smat(MLOC,NLOC)) + allocate(smat(MLOC,NLOC), stat=allocinfo) + if(allocinfo) return if (present(mpi_type)) then global_size = (/dimx, dimy/) From 727916c65cab13b5fa4c91b8d6bf259678646eb9 Mon Sep 17 00:00:00 2001 From: Arjen Tamerus Date: Thu, 20 Jun 2019 11:09:36 +0100 Subject: [PATCH 15/79] [tran.f90] Implement Parallel DGEMM to enable MPI/memory scaling (single-file only for now) --- tran.f90 | 347 +++++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 262 insertions(+), 85 deletions(-) diff --git a/tran.f90 b/tran.f90 index 45f56d2..827ee61 100644 --- a/tran.f90 +++ b/tran.f90 @@ -1215,8 +1215,13 @@ subroutine TRconvert_matel_j0_eigen(jrot) double precision,parameter :: alpha = 1.0d0,beta=0.0d0 character(len=cl) :: jchar,filename type(MPI_File) :: fileh, fileh_w - integer(kind=MPI_OFFSET_KIND) :: mpioffset + integer(kind=MPI_OFFSET_KIND) :: mpioffset,read_offset,write_offset integer :: ierr + + type(MPI_Datatype) :: gmat_block_type, psi_block_type, mat_t_block_type, mat_s_block_type, extF_block_type + integer,dimension(9) :: desc_gmat, desc_mat_t, desc_mat_s, desc_psi, desc_extF + integer :: blacs_row, blacs_col, i_local, j_local + integer :: i, j ! if (job%verbose>=2) write(out,"(/'Compute J=0 vib. matrix elements of the kinetic energy operator...')") ! @@ -1251,17 +1256,31 @@ subroutine TRconvert_matel_j0_eigen(jrot) ! dimen = bset_contr(1)%Maxcontracts ! - allocate(mat_s(Neigenroots,Neigenroots),stat=info) - call ArrayStart('mat_s',info,1,kind(mat_s),matsize2) + if (blacs_size.eq.1) then + allocate(mat_s(Neigenroots,Neigenroots),stat=info) + call ArrayStart('mat_s',info,1,kind(mat_s),matsize2) + else + call co_block_type_init(mat_s, Neigenroots, Neigenroots, desc_mat_s, info, mat_s_block_type) + call ArrayStart('mat_s',info,1,kind(mat_s),int(size(mat_s),hik)) + endif ! matsize = int(dimen*Neigenroots,hik) ! if (job%verbose>=3) write(out,"(/' Allocate two matrices of ',i8,'x',i8,' = ',i0,' elements.')") & Neigenroots,Neigenroots,matsize ! - allocate(psi(dimen,Neigenroots),mat_t(Neigenroots,dimen),stat=info) - call ArrayStart('psi',info,1,kind(psi),matsize) - call ArrayStart('mat_t',info,1,kind(mat_t),matsize) + if (blacs_size.eq.1) then + allocate(psi(dimen,Neigenroots),mat_t(Neigenroots,dimen),stat=info) + ! + call ArrayStart('psi',info,1,kind(psi),matsize) + call ArrayStart('mat_t',info,1,kind(mat_t),matsize) + else + call co_block_type_init(psi, Neigenroots, dimen, desc_psi, info) + call ArrayStart('psi',info,1,kind(psi),int(size(psi),hik)) + ! + call co_block_type_init(mat_t, dimen, Neigenroots, desc_mat_t, info) + call ArrayStart('mat_t',info,1,kind(mat_t),int(size(mat_t),hik)) + endif ! psi = 0 ! @@ -1315,41 +1334,72 @@ subroutine TRconvert_matel_j0_eigen(jrot) iroot = 0 ! do ilevel = 1,Neigenlevels - ! - igamma = eigen(ilevel)%igamma - iunit = TReigenvec_unit(1,(/0/),igamma) - ! - irec = eigen(ilevel)%irec(1) - ! - Nsize = bset_contr(1)%nsize(igamma) - ! - read(iunit, rec = irec) vec(1:Nsize) - ! - do ideg = 1, eigen(ilevel)%ndeg - ! - iroot = iroot + 1 - ! - !$omp parallel do private(icoeff,irow,ib,iterm,ielem) shared(vec) schedule(dynamic) - do icoeff = 1,dimen - ! - psi(icoeff,iroot) = 0 - ! - irow = bset_contr(1)%icontr2icase(icoeff,1) - ib = bset_contr(1)%icontr2icase(icoeff,2) - ! - iterm = ijterm(irow,igamma) - ! - do ielem = 1,bset_contr(1)%irr(igamma)%N(irow) + ! + igamma = eigen(ilevel)%igamma + iunit = TReigenvec_unit(1,(/0/),igamma) + ! + irec = eigen(ilevel)%irec(1) + ! + Nsize = bset_contr(1)%nsize(igamma) + ! + read(iunit, rec = irec) vec(1:Nsize) + ! + if(blacs_size.gt.1) then + do ideg = 1, eigen(ilevel)%ndeg + ! + iroot = iroot + 1 + ! + ! $omp parallel do private(icoeff,irow,ib,iterm,ielem) shared(vec) schedule(dynamic) + do icoeff = 1,dimen + ! + call infog2l(icoeff,iroot,desc_psi,nprow,npcol,myprow,mypcol,i_local,j_local,blacs_row,blacs_col) + if (myprow.eq.blacs_row.and.mypcol.eq.blacs_col) then ! - psi(icoeff,iroot) = psi(icoeff,iroot) + vec(iterm+ielem)*bset_contr(1)%irr(igamma)%repres(iterm+ielem,ideg,ib) + psi(i_local,j_local) = 0 ! - enddo - ! - enddo - !$omp end parallel do - ! - end do - ! + irow = bset_contr(1)%icontr2icase(icoeff,1) + ib = bset_contr(1)%icontr2icase(icoeff,2) + ! + iterm = ijterm(irow,igamma) + ! + do ielem = 1,bset_contr(1)%irr(igamma)%N(irow) + ! + psi(i_local,j_local) = psi(i_local,j_local) + vec(iterm+ielem)*bset_contr(1)%irr(igamma)%repres(iterm+ielem,ideg,ib) + ! + enddo + endif + ! + enddo + ! $omp end parallel do + ! + end do + else + do ideg = 1, eigen(ilevel)%ndeg + ! + iroot = iroot + 1 + ! + !$omp parallel do private(icoeff,irow,ib,iterm,ielem) shared(vec) schedule(dynamic) + do icoeff = 1,dimen + ! + psi(icoeff,iroot) = 0 + ! + irow = bset_contr(1)%icontr2icase(icoeff,1) + ib = bset_contr(1)%icontr2icase(icoeff,2) + ! + iterm = ijterm(irow,igamma) + ! + do ielem = 1,bset_contr(1)%irr(igamma)%N(irow) + ! + psi(icoeff,iroot) = psi(icoeff,iroot) + vec(iterm+ielem)*bset_contr(1)%irr(igamma)%repres(iterm+ielem,ideg,ib) + ! + enddo + ! + enddo + !$omp end parallel do + ! + end do + endif + ! end do ! deallocate(vec) @@ -1364,24 +1414,55 @@ subroutine TRconvert_matel_j0_eigen(jrot) ! iroot = 0 ! - do ilevel = 1,Neigenlevels - ! - igamma = eigen(ilevel)%igamma - iunit = TReigenvec_unit(1,(/0/),igamma) - ! - do ideg = 1, eigen(ilevel)%ndeg - ! - iroot = iroot + 1 - ! - iroot = eigen(ilevel)%iroot(ideg) + if (blacs_size.eq.1) then + do ilevel = 1,Neigenlevels ! - irec = eigen(ilevel)%irec(ideg) + igamma = eigen(ilevel)%igamma + iunit = TReigenvec_unit(1,(/0/),igamma) ! - read(iunit, rec = irec) psi(1:dimen,iroot) + do ideg = 1, eigen(ilevel)%ndeg + ! + iroot = iroot + 1 + ! + iroot = eigen(ilevel)%iroot(ideg) + ! + irec = eigen(ilevel)%irec(ideg) + ! + read(iunit, rec = irec) psi(1:dimen,iroot) + ! + enddo ! - enddo - ! - enddo + enddo + else + write(*,*) "TODO: This info2gl loop needs to be verified for correctness@TRAN.f90" + allocate(vec(dimen),stat = info) + ! + do ilevel = 1,Neigenlevels + ! + igamma = eigen(ilevel)%igamma + iunit = TReigenvec_unit(1,(/0/),igamma) + ! + do ideg = 1, eigen(ilevel)%ndeg + ! + iroot = iroot + 1 + ! + iroot = eigen(ilevel)%iroot(ideg) + ! + irec = eigen(ilevel)%irec(ideg) + ! + read(iunit, rec = irec) vec + ! + do i=1,dimen + call infog2l(i,iroot,desc_psi,nprow,npcol,myprow,mypcol,i_local,j_local,blacs_row,blacs_col) + if (myprow.eq.blacs_row.and.mypcol.eq.blacs_col) then + psi(i_local,j_local) = vec(i) + endif + enddo + ! + enddo + ! + enddo + endif ! endif ! @@ -1403,12 +1484,12 @@ subroutine TRconvert_matel_j0_eigen(jrot) job_is ='Eigen-vib. matrix elements of the rot. kinetic part' call IOStart(trim(job_is),chkptIO) ! - !open(chkptIO,form='unformatted',action='write',position='rewind',status='replace',file=job%kineteigen_file) - !write(chkptIO) 'Start Kinetic part' - call MPI_File_open(mpi_comm_world, job%kineteigen_file, mpi_mode_wronly+mpi_mode_create, mpi_info_null, fileh_w, ierr) - call MPI_File_set_errhandler(fileh_w, MPI_ERRORS_ARE_FATAL) - mpioffset = 0 - call MPI_File_set_size(fileh_w, mpioffset, ierr) + !open(chkptIO,form='unformatted',action='write',position='rewind',status='replace',file=job%kineteigen_file) + !write(chkptIO) 'Start Kinetic part' + call MPI_File_open(mpi_comm_world, job%kineteigen_file, mpi_mode_wronly+mpi_mode_create, mpi_info_null, fileh_w, ierr) + call MPI_File_set_errhandler(fileh_w, MPI_ERRORS_ARE_FATAL) + mpioffset = 0 + call MPI_File_set_size(fileh_w, mpioffset, ierr) if(mpi_rank .eq. 0) then call MPI_File_write(fileh_w, '[MPIIO]', 7, mpi_character, mpi_status_ignore, ierr) call MPI_File_write(fileh_w, 'Start Kinetic part', 18, mpi_character, mpi_status_ignore, ierr) @@ -1434,8 +1515,14 @@ subroutine TRconvert_matel_j0_eigen(jrot) rootsize2= int(bset_contr(1)%Maxcontracts,hik) rootsize2 = rootsize2*rootsize2 ! - allocate(gmat(dimen,dimen),stat=info) - call ArrayStart('gmat-fields',info,1,kind(gmat),rootsize2) + if(blacs_size.eq.1) then + allocate(gmat(dimen,dimen),stat=info) + call ArrayStart('gmat-fields',info,1,kind(gmat),rootsize2) + else + call co_block_type_init(gmat, dimen, dimen, desc_gmat, info, gmat_block_type) + call ArrayStart('gmat-fields',info,1,kind(gmat),int(size(gmat),hik)) + endif + ! ! ! Preparing slicing ! @@ -1465,6 +1552,8 @@ subroutine TRconvert_matel_j0_eigen(jrot) ! !write(chkptIO) 'g_rot' if(mpi_rank.eq.0) call MPI_File_write(fileh_w, 'g_rot', 5, mpi_character, mpi_status_ignore, ierr) + call mpi_barrier(MPI_COMM_WORLD, ierr) + call MPI_File_seek(fileh_w, int(0,MPI_OFFSET_KIND), MPI_SEEK_END) ! !call restore_rot_kinetic_matrix_elements(jrot,treat_vibration,task,iunit) call restore_rot_kinetic_matrix_elements_mpi(jrot,treat_vibration,task,fileh) @@ -1484,6 +1573,14 @@ subroutine TRconvert_matel_j0_eigen(jrot) ! islice = 0 ! + if (blacs_size.gt.1) then + call MPI_File_get_position(fileh, read_offset, ierr) + call MPI_File_set_view(fileh, read_offset, mpi_byte, gmat_block_type, "native", MPI_INFO_NULL, ierr) + + call MPI_File_get_position(fileh_w, write_offset, ierr) + call MPI_File_set_view(fileh_w, write_offset, mpi_byte, mat_s_block_type, "native", MPI_INFO_NULL, ierr) + endif + ! do k1 = 1,3 ! do k2 = 1,3 @@ -1502,17 +1599,24 @@ subroutine TRconvert_matel_j0_eigen(jrot) ! else ! - ! read(iunit) gmat - call MPI_File_read_all(fileh, gmat, dimen*dimen, mpi_double_precision, mpi_status_ignore, ierr) + ! read(iunit) gmat + call MPI_File_read_all(fileh, gmat, size(gmat), mpi_double_precision, mpi_status_ignore, ierr) !write (chkptIO) gmat ! endif ! ! - call dgemm('T','N',Neigenroots,dimen,dimen,alpha,psi,dimen,& - gmat,dimen,beta,mat_t,Neigenroots) - call dgemm('N','N',Neigenroots,Neigenroots,dimen,alpha,mat_t,Neigenroots,& - psi,dimen,beta,mat_s,Neigenroots) + if (blacs_size.gt.1) then + call pdgemm('T','N',Neigenroots,dimen,dimen,alpha,psi,1,1,desc_psi,& + gmat,1,1,desc_gmat,beta,mat_t,1,1,desc_mat_t) + call pdgemm('N','N',Neigenroots,Neigenroots,dimen,alpha,mat_t,1,1,desc_mat_t,& + psi,1,1,desc_psi,beta,mat_s,1,1,desc_mat_s) + else + call dgemm('T','N',Neigenroots,dimen,dimen,alpha,psi,dimen,& + gmat,dimen,beta,mat_t,Neigenroots) + call dgemm('N','N',Neigenroots,Neigenroots,dimen,alpha,mat_t,Neigenroots,& + psi,dimen,beta,mat_s,Neigenroots) + endif ! if (job%IOmatelem_split.and..not.job%vib_rot_contr) then ! @@ -1525,13 +1629,24 @@ subroutine TRconvert_matel_j0_eigen(jrot) else ! !write (chkptIO) mat_s - if(mpi_rank.eq.0) call MPI_File_write(fileh_w, mat_s, Neigenroots*Neigenroots, mpi_double_precision, mpi_status_ignore, ierr) + call MPI_File_write_all(fileh_w, mat_s, size(mat_s), mpi_double_precision, mpi_status_ignore, ierr) ! endif ! enddo ! enddo + ! + ! Reset view to flat file + if (blacs_size.gt.1) then + read_offset = read_offset + 9*dimen*dimen*mpi_real_size + call MPI_File_set_view(fileh, int(0,MPI_OFFSET_KIND), mpi_byte, mpi_byte, "native", MPI_INFO_NULL, ierr) + call MPI_File_seek(fileh, read_offset, MPI_SEEK_SET) + + write_offset = write_offset + 9*Neigenroots*Neigenroots*mpi_real_size + call MPI_File_set_view(fileh_w, int(0,MPI_OFFSET_KIND), mpi_byte, mpi_byte, "native", MPI_INFO_NULL, ierr) + call MPI_File_seek(fileh_w, write_offset, MPI_SEEK_SET) + endif ! if (job%verbose>=5) call TimerStop('J0-convertion for g_rot') @@ -1547,6 +1662,8 @@ subroutine TRconvert_matel_j0_eigen(jrot) ! !write(chkptIO) 'g_cor' if(mpi_rank.eq.0) call MPI_File_write(fileh_w, 'g_cor', 5, mpi_character, mpi_status_ignore, ierr) + call MPI_Barrier(MPI_COMM_WORLD, ierr) + call MPI_File_seek(fileh_w, int(0,MPI_OFFSET_KIND), MPI_SEEK_END) ! endif ! @@ -1556,6 +1673,14 @@ subroutine TRconvert_matel_j0_eigen(jrot) ! islice = 9 ! + if (blacs_size.gt.1) then + call MPI_File_get_position(fileh, read_offset, ierr) + call MPI_File_set_view(fileh, read_offset, mpi_byte, gmat_block_type, "native", MPI_INFO_NULL, ierr) + + call MPI_File_get_position(fileh_w, write_offset, ierr) + call MPI_File_set_view(fileh_w, write_offset, mpi_byte, mat_s_block_type, "native", MPI_INFO_NULL, ierr) + endif + ! !do k1 = 1,FLNmodes ! !if (job%contrci_me_fast.and.k1>1) cycle @@ -1576,15 +1701,22 @@ subroutine TRconvert_matel_j0_eigen(jrot) ! else ! - ! read(iunit) gmat - call MPI_File_read_all(fileh, gmat, dimen*dimen, mpi_double_precision, mpi_status_ignore, ierr) + ! read(iunit) gmat + call MPI_File_read_all(fileh, gmat, size(gmat), mpi_double_precision, mpi_status_ignore, ierr) ! endif ! - call dgemm('T','N',Neigenroots,dimen,dimen,alpha,psi,dimen,& - gmat,dimen,beta,mat_t,Neigenroots) - call dgemm('N','N',Neigenroots,Neigenroots,dimen,alpha,mat_t,Neigenroots,& - psi,dimen,beta,mat_s,Neigenroots) + if (blacs_size.gt.1) then + call pdgemm('T','N',Neigenroots,dimen,dimen,alpha,psi,1,1,desc_psi,& + gmat,1,1,desc_gmat,beta,mat_t,1,1,desc_mat_t) + call pdgemm('N','N',Neigenroots,Neigenroots,dimen,alpha,mat_t,1,1,desc_mat_t,& + psi,1,1,desc_psi,beta,mat_s,1,1,desc_mat_s) + else + call dgemm('T','N',Neigenroots,dimen,dimen,alpha,psi,dimen,& + gmat,dimen,beta,mat_t,Neigenroots) + call dgemm('N','N',Neigenroots,Neigenroots,dimen,alpha,mat_t,Neigenroots,& + psi,dimen,beta,mat_s,Neigenroots) + endif ! ! if (job%IOmatelem_split.and..not.job%vib_rot_contr) then @@ -1598,7 +1730,7 @@ subroutine TRconvert_matel_j0_eigen(jrot) else ! !write (chkptIO) mat_s - if(mpi_rank.eq.0) call MPI_File_write(fileh_w, mat_s, Neigenroots*Neigenroots, mpi_double_precision, mpi_status_ignore, ierr) + call MPI_File_write_all(fileh_w, mat_s, size(mat_s), mpi_double_precision, mpi_status_ignore, ierr) ! endif ! @@ -1606,6 +1738,17 @@ subroutine TRconvert_matel_j0_eigen(jrot) ! !enddo ! + ! Reset view to flat file + if (blacs_size.gt.1) then + read_offset = read_offset + 3*dimen*dimen*mpi_real_size + call MPI_File_set_view(fileh, int(0,MPI_OFFSET_KIND), mpi_byte, mpi_byte, "native", MPI_INFO_NULL, ierr) + call MPI_File_seek(fileh, read_offset, MPI_SEEK_SET) + + write_offset = write_offset + 3*Neigenroots*Neigenroots*mpi_real_size + call MPI_File_set_view(fileh_w, int(0,MPI_OFFSET_KIND), mpi_byte, mpi_byte, "native", MPI_INFO_NULL, ierr) + call MPI_File_seek(fileh_w, write_offset, MPI_SEEK_SET) + endif + ! if (job%verbose>=5) call TimerStop('J0-convertion for g_cor') ! !if (.not.job%IOmatelem_split.or.job%iswap(1)==1) write(chkptIO) 'End Kinetic part' @@ -1630,7 +1773,6 @@ subroutine TRconvert_matel_j0_eigen(jrot) ! ! External field part ! - !!!!! MPIIO TODO !!!!! if (FLextF_matelem) then ! if (job%verbose>=3) write(out,"(/' Transform extF to J0-representation...')") @@ -1706,6 +1848,8 @@ subroutine TRconvert_matel_j0_eigen(jrot) ! !write(chkptIO) Neigenroots if(mpi_rank.eq.0) call MPI_File_write(fileh_w, Neigenroots, 1, mpi_integer, mpi_status_ignore, ierr) + call mpi_barrier(mpi_comm_world, ierr) + call MPI_File_seek(fileh_w, int(0,MPI_OFFSET_KIND), MPI_SEEK_END) ! endif ! @@ -1716,8 +1860,21 @@ subroutine TRconvert_matel_j0_eigen(jrot) ! if (job%verbose>=4) write(out,"(/'restore Extvib...: Number of elements: ',i0)") ncontr_t ! - allocate(extF_me(ncontr_t,ncontr_t),stat=info) - call ArrayStart('extF_me',info,1,kind(extF_me),rootsize2) + if(blacs_size.eq.1) then + allocate(extF_me(ncontr_t,ncontr_t),stat=info) + call ArrayStart('extF_me',info,1,kind(extF_me),rootsize2) + else + call co_block_type_init(extF_me, ncontr_t, ncontr_t, desc_extF, info, extF_block_type) + call ArrayStart('extF_me',info,1,kind(extF_me),int(size(extF_me),hik)) + endif + ! + if (blacs_size.gt.1) then + call MPI_File_get_position(fileh, read_offset, ierr) + call MPI_File_set_view(fileh, read_offset, mpi_byte, extF_block_type, "native", MPI_INFO_NULL, ierr) + + call MPI_File_get_position(fileh_w, write_offset, ierr) + call MPI_File_set_view(fileh_w, write_offset, mpi_byte, mat_s_block_type, "native", MPI_INFO_NULL, ierr) + endif ! do imu = fitting%iparam(1),fitting%iparam(2) ! @@ -1737,14 +1894,21 @@ subroutine TRconvert_matel_j0_eigen(jrot) call MPI_File_read_all(fileh, imu_t, 1, mpi_integer, mpi_status_ignore, ierr) ! !read(iunit) extF_me - call MPI_File_read_all(fileh, extF_me, dimen*dimen, mpi_double_precision, mpi_status_ignore, ierr) + call MPI_File_read_all(fileh, extF_me, size(extF_me), mpi_double_precision, mpi_status_ignore, ierr) ! endif ! - call dgemm('T','N',Neigenroots,dimen,dimen,alpha,psi,dimen,& - extF_me,dimen,beta,mat_t,Neigenroots) - call dgemm('N','N',Neigenroots,Neigenroots,dimen,alpha,mat_t,Neigenroots,& - psi,dimen,beta,mat_s,Neigenroots) + if(blacs_size.gt.1) then + call pdgemm('T','N',Neigenroots,dimen,dimen,alpha,psi,1,1,desc_psi,& + extF_me,1,1,desc_gmat,beta,mat_t,1,1,desc_mat_t) + call pdgemm('N','N',Neigenroots,Neigenroots,dimen,alpha,mat_t,1,1,desc_mat_t,& + psi,1,1,desc_psi,beta,mat_s,1,1,desc_mat_s) + else + call dgemm('T','N',Neigenroots,dimen,dimen,alpha,psi,dimen,& + extF_me,dimen,beta,mat_t,Neigenroots) + call dgemm('N','N',Neigenroots,Neigenroots,dimen,alpha,mat_t,Neigenroots,& + psi,dimen,beta,mat_s,Neigenroots) + endif ! !mat_s = 0 ! @@ -1777,7 +1941,9 @@ subroutine TRconvert_matel_j0_eigen(jrot) !write(chkptIO) imu !write(chkptIO) mat_s if(mpi_rank.eq.0) call MPI_File_write(fileh_w, imu, 1, mpi_integer, mpi_status_ignore, ierr) - if(mpi_rank.eq.0) call MPI_File_write(fileh_w, mat_s, Neigenroots*Neigenroots, mpi_double_precision, mpi_status_ignore, ierr) + call MPI_Barrier(mpi_comm_world, ierr) + call MPI_File_seek(fileh_w, int(0,MPI_OFFSET_KIND), MPI_SEEK_END) + call MPI_File_write(fileh_w, mat_s, size(mat_s), mpi_double_precision, mpi_status_ignore, ierr) ! else ! @@ -1786,6 +1952,17 @@ subroutine TRconvert_matel_j0_eigen(jrot) endif ! enddo + ! Reset view to flat file + if (blacs_size.gt.1) then + read_offset = read_offset + (fitting%iparam(2)-fitting%iparam(1)+1)*ncontr_t*ncontr_t*mpi_real_size & + + (fitting%iparam(2)-fitting%iparam(1)+1)*mpi_int_size + call MPI_File_set_view(fileh, int(0,MPI_OFFSET_KIND), mpi_byte, mpi_byte, "native", MPI_INFO_NULL, ierr) + call MPI_File_seek(fileh, read_offset, MPI_SEEK_SET) + + write_offset = write_offset + 3*Neigenroots*Neigenroots*mpi_real_size + call MPI_File_set_view(fileh_w, int(0,MPI_OFFSET_KIND), mpi_byte, mpi_byte, "native", MPI_INFO_NULL, ierr) + call MPI_File_seek(fileh_w, write_offset, MPI_SEEK_SET) + endif ! if (allocated(extF_me)) deallocate(extF_me) call ArrayStop('extF_me') From 9870c3454150ad9c31ca871891aea0fd1e2f88f5 Mon Sep 17 00:00:00 2001 From: Arjen Tamerus Date: Fri, 21 Jun 2019 16:16:29 +0100 Subject: [PATCH 16/79] [tran.f90] Fix x/y dimension confusion, plus minor cleanup --- mpi_aux.f90 | 2 -- tran.f90 | 6 ++---- trove.f90 | 2 +- 3 files changed, 3 insertions(+), 7 deletions(-) diff --git a/mpi_aux.f90 b/mpi_aux.f90 index 0c0275b..6fbd535 100644 --- a/mpi_aux.f90 +++ b/mpi_aux.f90 @@ -102,8 +102,6 @@ subroutine co_sum_double(x, root_process) real(rk), intent(inout), dimension(:,:) :: x integer, optional :: root_process - integer :: i - !integer, save :: result_image_mpi[*] if (comm_size.eq.1) return call TimerStart('co_sum_double') diff --git a/tran.f90 b/tran.f90 index 827ee61..dd639ae 100644 --- a/tran.f90 +++ b/tran.f90 @@ -1243,7 +1243,6 @@ subroutine TRconvert_matel_j0_eigen(jrot) ' at least one must be set to CONVERT or EIGENfunc SAVE CONVERT' stop 'TRconvert_matel_j0_eigen: illegal MATELEM or EXTMATELEM <> CONVERT' end if - call co_init_comms()!TODO get rid of this one ! matsize = int(Neigenroots*(Neigenroots+1)/2,hik) matsize2 = int(Neigenroots*Neigenroots,hik) @@ -1275,10 +1274,10 @@ subroutine TRconvert_matel_j0_eigen(jrot) call ArrayStart('psi',info,1,kind(psi),matsize) call ArrayStart('mat_t',info,1,kind(mat_t),matsize) else - call co_block_type_init(psi, Neigenroots, dimen, desc_psi, info) + call co_block_type_init(psi, dimen, Neigenroots, desc_psi, info) call ArrayStart('psi',info,1,kind(psi),int(size(psi),hik)) ! - call co_block_type_init(mat_t, dimen, Neigenroots, desc_mat_t, info) + call co_block_type_init(mat_t, Neigenroots, dimen, desc_mat_t, info) call ArrayStart('mat_t',info,1,kind(mat_t),int(size(mat_t),hik)) endif ! @@ -2482,7 +2481,6 @@ subroutine restore_rot_kinetic_matrix_elements_mpi(jrot,treat_vibration,kinetic_ ! nclasses = bset_contr(1)%nclasses ! - !call co_init_comms() select case (kinetic_part) ! case('rot','top') diff --git a/trove.f90 b/trove.f90 index b1e9b7d..6b89f25 100644 --- a/trove.f90 +++ b/trove.f90 @@ -180,12 +180,12 @@ subroutine ptmain ! ! Convert the J=0 basis set and mat.elements to the contracted represent. ! + call co_init_comms() if (action%convert_vibme) then call TRconvert_matel_j0_eigen(j) return endif ! - call co_init_comms() if (job%contrci_me_fast) then ! if (mpi_rank.eq.0) then From 6c4d732f8f8743152c55100ecaa6cf12d40aa6e2 Mon Sep 17 00:00:00 2001 From: Arjen Tamerus Date: Mon, 1 Jul 2019 11:54:46 +0100 Subject: [PATCH 17/79] Implement SPLIT file i/o for MPI version --- mpi_aux.f90 | 33 +++++--- perturbation.f90 | 214 ++++++++++++++++++++++------------------------- tran.f90 | 172 +++++++++++++++++++++++++++++++++---- 3 files changed, 277 insertions(+), 142 deletions(-) diff --git a/mpi_aux.f90 b/mpi_aux.f90 index 6fbd535..117746d 100644 --- a/mpi_aux.f90 +++ b/mpi_aux.f90 @@ -75,17 +75,17 @@ subroutine co_block_type_init(smat, dimx, dimy, descr, allocinfo, mpi_type) if (.not. comms_inited) stop "CO_BLOCK_TYPE_INIT COMMS NOT INITED" - MB = dimx/nprow - NB = dimy/npcol - MLOC = NUMROC( dimx, MB, myprow, 0, nprow ) - NLOC = NUMROC( dimy, NB, mypcol, 0, npcol ) - call DESCINIT(descr, dimx, dimy, MB, NB, 0, 0, blacs_ctxt, max(MLOC,1), ierr) + MB = dimy/nprow + NB = dimx/npcol + MLOC = NUMROC( dimy, MB, myprow, 0, nprow ) + NLOC = NUMROC( dimx, NB, mypcol, 0, npcol ) + call DESCINIT(descr, dimy, dimx, MB, NB, 0, 0, blacs_ctxt, max(MLOC,1), ierr) allocate(smat(MLOC,NLOC), stat=allocinfo) if(allocinfo) return if (present(mpi_type)) then - global_size = (/dimx, dimy/) + global_size = (/dimy, dimx/) distr = (/MPI_DISTRIBUTE_CYCLIC, MPI_DISTRIBUTE_CYCLIC/) dargs = (/MB, NB/) call MPI_Type_create_darray(blacs_size, blacs_rank, 2, global_size, distr, dargs, blacs_dims, & @@ -313,14 +313,18 @@ end subroutine co_distr_data subroutine co_read_matrix_distr(x, longdim, lb, ub, infile) use accuracy - real(rk),dimension(:,lb:),intent(in) :: x + real(rk),dimension(:,lb:),intent(out) :: x integer,intent(in) :: longdim, lb, ub - type(MPI_File),intent(in) :: infile + type(MPI_File),intent(inout) :: infile type(MPI_Status) :: writestat integer(kind=MPI_Offset_kind) :: offset_start,offset_end integer :: readcount, ierr + call mpi_barrier(mpi_comm_world, ierr) + + call TimerStart('MPI_read_matrix') + if (comm_size.gt.1) then offset_start = (lb-1)*longdim*mpi_real_size offset_end = (longdim-ub)*longdim*mpi_real_size @@ -329,9 +333,11 @@ subroutine co_read_matrix_distr(x, longdim, lb, ub, infile) call MPI_File_read_all(infile,x,1,mpitype_column,writestat,ierr) call MPI_File_seek(infile, offset_end, MPI_SEEK_CUR) else - call MPI_File_read_all(infile,x,1,mpitype_column,writestat,ierr) + call MPI_File_read(infile,x,1,mpitype_column,writestat,ierr) endif + call TimerStop('MPI_read_matrix') + end subroutine co_read_matrix_distr subroutine co_write_matrix_distr(x, longdim, lb, ub, outfile) @@ -339,14 +345,14 @@ subroutine co_write_matrix_distr(x, longdim, lb, ub, outfile) real(rk),dimension(:,lb:),intent(in) :: x integer,intent(in) :: longdim, lb, ub - type(MPI_File),intent(in) :: outfile + type(MPI_File),intent(inout) :: outfile integer :: ierr integer(kind=MPI_Offset_kind) :: offset_start, offset_end type(MPI_Status) :: writestat call mpi_barrier(mpi_comm_world, ierr) - call TimerStart('MPI_write') + call TimerStart('MPI_write_matrix') if (comm_size.gt.1) then offset_start = (lb-1)*longdim*mpi_real_size @@ -354,12 +360,13 @@ subroutine co_write_matrix_distr(x, longdim, lb, ub, outfile) call MPI_File_seek(outfile, offset_start, MPI_SEEK_END) call MPI_File_write_all(outfile,x,1,mpitype_column,writestat,ierr) + call mpi_barrier(mpi_comm_world, ierr) call MPI_File_seek(outfile, offset_end, MPI_SEEK_END) else - call MPI_File_write_all(outfile,x,1,mpitype_column,writestat,ierr) + call MPI_File_write(outfile,x,1,mpitype_column,writestat,ierr) endif - call TimerStop('MPI_write') + call TimerStop('MPI_write_matrix') end subroutine co_write_matrix_distr diff --git a/perturbation.f90 b/perturbation.f90 index 237364d..ab32f19 100644 --- a/perturbation.f90 +++ b/perturbation.f90 @@ -7943,7 +7943,7 @@ subroutine PTrestore_rot_kinetic_matrix_elements_mpi(jrot, task, fileh, dimen, & ! Read the rotational part only ! !allocate(mat_(maxcontr,maxcontr),stat=ierr) - call ArrayStart('PThamiltonian_contract: mat_',ierr,1,kind(mat_),rootsize2_) + !call ArrayStart('PThamiltonian_contract: mat_',ierr,1,kind(mat_),rootsize2_) ! if (.not.job%IOmatelem_split) then ! @@ -7954,53 +7954,50 @@ subroutine PTrestore_rot_kinetic_matrix_elements_mpi(jrot, task, fileh, dimen, & stop 'PTrestore_rot_kinetic_matrix_elements - in file - g_rot missing' end if ! - endif - ! - allocate(grot(3,3),stat=ierr) - ! - !islice = 0 - ! - !do k1 = 1,3 - ! ! - ! do k2 = 1,3 - ! ! - ! islice = islice + 1 - ! ! - ! call divided_slice_open_mpi(islice,fileh_slice,'g_rot',job%matelem_suffix) - ! ! - ! allocate(grot(k1,k2)%me(maxcontr,maxcontr),stat=ierr) - ! call ArrayStart('grot-matrix',ierr,1,kind(f_t),rootsize2_) - ! ! - ! call MPI_File_read_all(fileh, mat_(1:ncontr,1:ncontr), ncontr*ncontr, mpi_double_precision, mpi_status_ignore, ierr) - ! ! - ! grot(k1,k2)%me(1:ncontr,1:ncontr) = mat_(1:ncontr,1:ncontr) - ! ! - ! call divided_slice_close_mpi(islice,fileh_slice,'g_rot') - ! ! - ! enddo - !enddo - ! - islice = 0 - ! - do k1 = 1,3 + allocate(grot(3,3),stat=ierr) ! - do k2 = 1,3 - ! - islice = islice + 1 - ! - call divided_slice_open_mpi(islice,fileh_slice,'g_rot',job%matelem_suffix) - ! - allocate(grot(k1,k2)%me(localmatrix_y,co_startdim:co_enddim),stat=ierr) - call ArrayStart('grot-matrix',ierr,1,kind(f_t),localrootsize) - ! - call co_read_matrix_distr(grot(k1,k2)%me, ncontr, co_startdim, co_enddim, fileh) + islice = 0 + ! + do k1 = 1,3 ! - call divided_slice_close_mpi(islice,fileh_slice,'g_rot') + do k2 = 1,3 + ! + islice = islice + 1 + ! + allocate(grot(k1,k2)%me(localmatrix_y,co_startdim:co_enddim),stat=ierr) + call ArrayStart('grot-matrix',ierr,1,kind(f_t),localrootsize) + ! + call co_read_matrix_distr(grot(k1,k2)%me, ncontr, co_startdim, co_enddim, fileh) + ! + enddo + enddo + ! + else + ! + allocate(grot(3,3),stat=ierr) + ! + islice = 0 + ! + do k1 = 1,3 ! + do k2 = 1,3 + ! + islice = islice + 1 + ! + call divided_slice_open_mpi(islice,fileh_slice,'g_rot',job%matelem_suffix) + ! + allocate(grot(k1,k2)%me(localmatrix_y,co_startdim:co_enddim),stat=ierr) + call ArrayStart('grot-matrix',ierr,1,kind(f_t),localrootsize) + ! + call co_read_matrix_distr(grot(k1,k2)%me, ncontr, co_startdim, co_enddim, fileh_slice) + ! + call divided_slice_close_mpi(islice,fileh_slice,'g_rot') + ! + enddo enddo - enddo + endif !deallocate(mat_) - call ArrayStop('PThamiltonian_contract: mat_') + !call ArrayStop('PThamiltonian_contract: mat_') ! if (job%verbose>=4) write(out,"(' ...done!')") ! @@ -8020,43 +8017,43 @@ subroutine PTrestore_rot_kinetic_matrix_elements_mpi(jrot, task, fileh, dimen, & stop 'PTrestore_rot_kinetic_matrix_elements - in file - g_cor missing' end if ! - endif - ! - allocate(gcor(3),stat=ierr) - ! - islice = 9 - ! - !do k1 = 1,3 - ! ! - ! islice = islice + 1 - ! ! - ! allocate(gcor(k1)%me(maxcontr,maxcontr),stat=ierr) - ! call ArrayStart('gcor-matrix',ierr,1,kind(f_t),rootsize2_) - ! ! - ! call divided_slice_open_mpi(islice,fileh_slice,'g_cor',job%matelem_suffix) - ! ! - ! call MPI_File_read_all(fileh, mat_(1:ncontr,1:ncontr), ncontr*ncontr, mpi_double_precision, mpi_status_ignore, ierr) - ! ! - ! gcor(k1)%me(1:ncontr,1:ncontr) = mat_(1:ncontr,1:ncontr) - ! ! - ! call divided_slice_close_mpi(islice,fileh_slice,'g_cor') - ! ! - !enddo - ! - do k1 = 1,3 + allocate(gcor(3),stat=ierr) ! - islice = islice + 1 + islice = 9 ! - allocate(gcor(k1)%me(localmatrix_y,co_startdim:co_enddim),stat=ierr) - call ArrayStart('gcor-matrix',ierr,1,kind(f_t),localrootsize) + do k1 = 1,3 + ! + islice = islice + 1 + ! + allocate(gcor(k1)%me(localmatrix_y,co_startdim:co_enddim),stat=ierr) + call ArrayStart('gcor-matrix',ierr,1,kind(f_t),localrootsize) + ! + call co_read_matrix_distr(gcor(k1)%me, ncontr, co_startdim, co_enddim, fileh) + ! + enddo ! - call divided_slice_open_mpi(islice,fileh_slice,'g_cor',job%matelem_suffix) + else ! - call co_read_matrix_distr(gcor(k1)%me, ncontr, co_startdim, co_enddim, fileh) + allocate(gcor(3),stat=ierr) ! - call divided_slice_close_mpi(islice,fileh_slice,'g_cor') + islice = 9 ! - enddo + do k1 = 1,3 + ! + islice = islice + 1 + ! + allocate(gcor(k1)%me(localmatrix_y,co_startdim:co_enddim),stat=ierr) + call ArrayStart('gcor-matrix',ierr,1,kind(f_t),localrootsize) + ! + call divided_slice_open_mpi(islice,fileh_slice,'g_cor',job%matelem_suffix) + ! + call co_read_matrix_distr(gcor(k1)%me, ncontr, co_startdim, co_enddim, fileh_slice) + ! + call divided_slice_close_mpi(islice,fileh_slice,'g_cor') + ! + enddo + ! + endif ! !deallocate(mat_) !call ArrayStop('PThamiltonian_contract: mat_') @@ -8079,13 +8076,6 @@ subroutine PTrestore_rot_kinetic_matrix_elements_mpi(jrot, task, fileh, dimen, & stop 'PTrestore_rot_kinetic_matrix_elements - in file - g_rot missing' end if ! - !do k1 = 1,3 - ! do k2 = 1,3 - ! ! - ! call MPI_File_read_all(fileh, mat_(1:ncontr,1:ncontr), ncontr*ncontr, mpi_double_precision, mpi_status_ignore, ierr) - ! ! - ! enddo - !enddo file_offset = 9*ncontr*ncontr*mpi_real_size call MPI_File_seek(fileh, file_offset, MPI_SEEK_CUR) ! @@ -8096,13 +8086,6 @@ subroutine PTrestore_rot_kinetic_matrix_elements_mpi(jrot, task, fileh, dimen, & stop 'PTrestore_rot_kinetic_matrix_elements - in file - g_cor missing' end if ! - !!do k1 = 1,PT%Nmodes - !do k2 = 1,3 - ! ! - ! call MPI_File_read_all(fileh, mat_(1:ncontr,1:ncontr), ncontr*ncontr, mpi_double_precision, mpi_status_ignore, ierr) - ! ! - !enddo - !!enddo file_offset = 3*ncontr*ncontr*mpi_real_size call MPI_File_seek(fileh, file_offset, MPI_SEEK_CUR) ! @@ -8355,8 +8338,8 @@ subroutine divided_slice_open_mpi(islice,fileh,chkpt_type,suffix) ! call MPI_File_read_all(fileh, readbuf, ilen, mpi_character, mpi_status_ignore, ierr) if ( trim(readbuf(1:ilen))/=trim(chkpt_type) ) then - if (mpi_rank .eq. 0) write (out,"(' kinetic checkpoint slice ',a20,': header is missing or wrong',a)") filename,readbuf(1:ilen) - stop 'PTrestore_rot_kinetic_matrix_elements - in slice - header missing or wrong' + if (mpi_rank .eq. 0) write (out,"(' [MPI] kinetic checkpoint slice ',a20,': header is missing or wrong',a)") filename,readbuf(1:ilen) + stop 'PTrestore_rot_kinetic_matrix_elements_mpi - in slice - header missing or wrong' end if end subroutine divided_slice_open_mpi @@ -8377,8 +8360,8 @@ subroutine divided_slice_close_mpi(islice,fileh,chkpt_type) ! call MPI_File_read_all(fileh, readbuf, ilen, mpi_character, mpi_status_ignore, ierr) if ( trim(readbuf(1:ilen))/=trim(chkpt_type) ) then - if(mpi_rank .eq. 0) write (out,"(' divided_slice_close, kinetic checkpoint slice: footer is missing or wrong',a)") readbuf(1:ilen) - stop 'divided_slice_close - in slice - footer missing or wrong' + if(mpi_rank .eq. 0) write (out,"(' divided_slice_close_mpi, kinetic checkpoint slice: footer is missing or wrong',a)") readbuf(1:ilen) + stop 'divided_slice_close_mpi - in slice - footer missing or wrong' end if ! call close_chkptfile_mpi(fileh) @@ -16150,6 +16133,7 @@ subroutine PTcontracted_matelem_class(jrot) (.not.job%IOmatelem_split.or.job%iswap(1)==0 ) ) then ! if (trim(job%kinetmat_format).eq.'MPIIO') then + call mpi_barrier(mpi_comm_world, ierr) if(mpi_rank.eq.0) then call MPI_File_seek(chkptMPIIO, mpioffset, MPI_SEEK_END) call MPI_File_write(chkptMPIIO,'End Kinetic Part',16,mpi_character,mpi_status_ignore,ierr) @@ -16326,7 +16310,7 @@ subroutine PTcontracted_matelem_class(jrot) if (trim(job%kinetmat_format).eq.'MPIIO') then if (mpi_rank.eq.0) then !AT if(.not.job%IOextF_divide) then - call MPI_File_seek(chkptMPIIO, mpioffset, MPI_SEEK_END) + !call MPI_File_seek(chkptMPIIO, mpioffset, MPI_SEEK_END) call MPI_File_write(chkptMPIIO,'End external field',18,mpi_character,mpi_status_ignore,ierr) endif endif @@ -16456,16 +16440,16 @@ end subroutine write_divided_slice subroutine write_divided_slice_mpi(islice,name,suffix,N,field) ! - integer(ik),intent(in) :: islice - character(len=*),intent(in) :: name,suffix - integer(ik),intent(in) :: N - real(rk),intent(in) :: field(N,N) - character(len=4) :: jchar - character(len=cl) :: filename - character(len=cl) :: job_is - type(MPI_File) :: chkptMPIIO - integer(kind=MPI_OFFSET_KIND) :: offset - integer :: ierr + integer(ik),intent(in) :: islice + character(len=*),intent(in) :: name,suffix + integer(ik),intent(in) :: N + real(rk),dimension(:,:),intent(in) :: field + character(len=4) :: jchar + character(len=cl) :: filename + character(len=cl) :: job_is + type(MPI_File) :: chkptMPIIO + integer(kind=MPI_OFFSET_KIND) :: offset + integer :: ierr ! write(job_is,"('single swap_matrix')") ! @@ -16475,14 +16459,15 @@ subroutine write_divided_slice_mpi(islice,name,suffix,N,field) ! filename = trim(suffix)//trim(adjustl(jchar))//'.chk' ! + offset = 0 call MPI_File_open(mpi_comm_world, filename, mpi_mode_wronly+mpi_mode_create, mpi_info_null, chkptMPIIO, ierr) + call MPI_File_set_size(chkptMPIIO, offset, ierr) ! - if(mpi_rank .eq. 0) call MPI_File_write(chkptMPIIO,trim(name),len(trim(name)),mpi_character,mpi_status_ignore,ierr) + if(mpi_rank .eq. 0) call MPI_File_write(chkptMPIIO,name,len(trim(name)),mpi_character,mpi_status_ignore,ierr) ! - call co_write_matrix_distr(field,mdimen, startdim, enddim,chkptMPIIO) + call co_write_matrix_distr(field, N, co_startdim, co_enddim,chkptMPIIO) ! - call MPI_File_seek(chkptMPIIO, offset, MPI_SEEK_END) - if(mpi_rank .eq. 0) call MPI_File_write(chkptMPIIO,trim(name),len(trim(name)),mpi_character,mpi_status_ignore,ierr) + if(mpi_rank .eq. 0) call MPI_File_write(chkptMPIIO,name,len(trim(name)),mpi_character,mpi_status_ignore,ierr) ! call MPI_File_close(chkptMPIIO, ierr) ! @@ -29886,6 +29871,7 @@ end subroutine PTanalysis_density ! all necessary information ! subroutine check_point_active_space(action) + use mpi_aux character(len=*), intent(in) :: action ! 'SAVE' or 'READ' ! @@ -29895,11 +29881,11 @@ subroutine check_point_active_space(action) write (out,"(' check_point_active_space - action ',a,' is not valid')") trim(action) stop 'check_point_active_space - bogus command' case ('SAVE','save') - call checkpointSave + if(mpi_rank.eq.0) call checkpointSave case ('APPEND') - call checkpointAppend + if(mpi_rank.eq.0) call checkpointAppend case ('CLOSE','close') - call checkpointClose + if(mpi_rank.eq.0) call checkpointClose case ('READ','read') call checkpointRestore end select @@ -30301,6 +30287,7 @@ end subroutine check_point_active_space ! read-write the contracted basis sets ! subroutine PTcheck_point_dvr(action) + use mpi_aux character(len=*), intent(in) :: action ! 'SAVE' or 'READ' ! @@ -30310,7 +30297,7 @@ subroutine PTcheck_point_dvr(action) write (out,"(' PTcheck_point_dvr - action ',a,' is not valid')") trim(action) stop 'PTcheck_point_dvr - bogus command' case ('SAVE','save') - call checkpointSave + if(mpi_rank.eq.0) call checkpointSave case ('READ','read') call checkpointRestore case ('NONE','none') @@ -30595,6 +30582,7 @@ end subroutine PTcheck_point_dvr ! read-write the contracted basis sets ! subroutine PTcheck_point_contracted_space(action) + use mpi_aux ! implicit none ! @@ -30606,9 +30594,9 @@ subroutine PTcheck_point_contracted_space(action) write (out,"(' PTcheck_point_contracted_space - action ',a,' is not valid')") trim(action) stop 'PTcheck_point_contracted_space - bogus command' case ('SAVE','save') - call checkpointSave + if(mpi_rank.eq.0) call checkpointSave case ('CLOSE','close') - call checkpointClose + if(mpi_rank.eq.0) call checkpointClose case ('READ','read') call checkpointRestore end select diff --git a/tran.f90 b/tran.f90 index dd639ae..922eedb 100644 --- a/tran.f90 +++ b/tran.f90 @@ -1274,10 +1274,10 @@ subroutine TRconvert_matel_j0_eigen(jrot) call ArrayStart('psi',info,1,kind(psi),matsize) call ArrayStart('mat_t',info,1,kind(mat_t),matsize) else - call co_block_type_init(psi, dimen, Neigenroots, desc_psi, info) + call co_block_type_init(psi, Neigenroots, dimen, desc_psi, info) call ArrayStart('psi',info,1,kind(psi),int(size(psi),hik)) ! - call co_block_type_init(mat_t, Neigenroots, dimen, desc_mat_t, info) + call co_block_type_init(mat_t, dimen, Neigenroots, desc_mat_t, info) call ArrayStart('mat_t',info,1,kind(mat_t),int(size(mat_t),hik)) endif ! @@ -1572,7 +1572,7 @@ subroutine TRconvert_matel_j0_eigen(jrot) ! islice = 0 ! - if (blacs_size.gt.1) then + if ((.not.job%IOmatelem_split) .and. blacs_size.gt.1) then call MPI_File_get_position(fileh, read_offset, ierr) call MPI_File_set_view(fileh, read_offset, mpi_byte, gmat_block_type, "native", MPI_INFO_NULL, ierr) @@ -1590,7 +1590,11 @@ subroutine TRconvert_matel_j0_eigen(jrot) ! if (job%IOmatelem_split.and..not.job%vib_rot_contr) then ! - call divided_slice_read(islice,'g_rot',job%matelem_suffix,dimen,gmat,ierror) + if (trim(job%kinetmat_format).eq.'MPIIO') then + call divided_slice_read_mpi(islice,'g_rot',job%matelem_suffix,dimen,gmat,gmat_block_type,ierror) + else + call divided_slice_read(islice,'g_rot',job%matelem_suffix,dimen,gmat,ierror) + endif ! elseif (job%IOmatelem_split.and.job%vib_rot_contr) then ! @@ -1619,7 +1623,11 @@ subroutine TRconvert_matel_j0_eigen(jrot) ! if (job%IOmatelem_split.and..not.job%vib_rot_contr) then ! - if(mpi_rank.eq.0) call divided_slice_write(islice,'g_rot',job%j0matelem_suffix,Neigenroots,mat_s) + if (trim(job%kinetmat_format).eq.'MPIIO') then + call divided_slice_write_mpi(islice,'g_rot',job%j0matelem_suffix,Neigenroots,mat_s,mat_s_block_type) + else + if(mpi_rank.eq.0) call divided_slice_write(islice,'g_rot',job%j0matelem_suffix,Neigenroots,mat_s) + endif ! elseif (job%IOmatelem_split.and.job%vib_rot_contr) then ! @@ -1637,7 +1645,7 @@ subroutine TRconvert_matel_j0_eigen(jrot) enddo ! ! Reset view to flat file - if (blacs_size.gt.1) then + if ((.not.job%IOmatelem_split) .and. blacs_size.gt.1) then read_offset = read_offset + 9*dimen*dimen*mpi_real_size call MPI_File_set_view(fileh, int(0,MPI_OFFSET_KIND), mpi_byte, mpi_byte, "native", MPI_INFO_NULL, ierr) call MPI_File_seek(fileh, read_offset, MPI_SEEK_SET) @@ -1672,7 +1680,7 @@ subroutine TRconvert_matel_j0_eigen(jrot) ! islice = 9 ! - if (blacs_size.gt.1) then + if ((.not.job%IOmatelem_split) .and. blacs_size.gt.1) then call MPI_File_get_position(fileh, read_offset, ierr) call MPI_File_set_view(fileh, read_offset, mpi_byte, gmat_block_type, "native", MPI_INFO_NULL, ierr) @@ -1692,7 +1700,11 @@ subroutine TRconvert_matel_j0_eigen(jrot) ! if (job%IOmatelem_split.and..not.job%vib_rot_contr) then ! - call divided_slice_read(islice,'g_cor',job%matelem_suffix,dimen,gmat,ierror) + if (trim(job%kinetmat_format).eq.'MPIIO') then + call divided_slice_read_mpi(islice,'g_cor',job%matelem_suffix,dimen,gmat,gmat_block_type,ierror) + else + call divided_slice_read(islice,'g_cor',job%matelem_suffix,dimen,gmat,ierror) + endif ! elseif (job%IOmatelem_split.and.job%vib_rot_contr) then ! @@ -1720,7 +1732,11 @@ subroutine TRconvert_matel_j0_eigen(jrot) ! if (job%IOmatelem_split.and..not.job%vib_rot_contr) then ! - if(mpi_rank.eq.0) call divided_slice_write(islice,'g_cor',job%j0matelem_suffix,Neigenroots,mat_s) + if (trim(job%kinetmat_format).eq.'MPIIO') then + call divided_slice_write_mpi(islice,'g_cor',job%j0matelem_suffix,Neigenroots,mat_s,mat_s_block_type) + else + if(mpi_rank.eq.0) call divided_slice_write(islice,'g_cor',job%j0matelem_suffix,Neigenroots,mat_s) + endif ! elseif (job%IOmatelem_split.and.job%vib_rot_contr) then ! @@ -1738,7 +1754,7 @@ subroutine TRconvert_matel_j0_eigen(jrot) !enddo ! ! Reset view to flat file - if (blacs_size.gt.1) then + if ((.not.job%IOmatelem_split) .and. blacs_size.gt.1) then read_offset = read_offset + 3*dimen*dimen*mpi_real_size call MPI_File_set_view(fileh, int(0,MPI_OFFSET_KIND), mpi_byte, mpi_byte, "native", MPI_INFO_NULL, ierr) call MPI_File_seek(fileh, read_offset, MPI_SEEK_SET) @@ -1867,7 +1883,7 @@ subroutine TRconvert_matel_j0_eigen(jrot) call ArrayStart('extF_me',info,1,kind(extF_me),int(size(extF_me),hik)) endif ! - if (blacs_size.gt.1) then + if ((.not.job%IOextF_divide) .and. blacs_size.gt.1) then call MPI_File_get_position(fileh, read_offset, ierr) call MPI_File_set_view(fileh, read_offset, mpi_byte, extF_block_type, "native", MPI_INFO_NULL, ierr) @@ -1881,7 +1897,11 @@ subroutine TRconvert_matel_j0_eigen(jrot) ! if (job%IOextF_divide) then ! - call divided_slice_read(imu,'extF',job%extmat_suffix,dimen,extF_me,ierror) + if (trim(job%kinetmat_format).eq.'MPIIO') then + call divided_slice_read_mpi(imu,'extF',job%extmat_suffix,dimen,extF_me,extF_block_type,ierror) + else + call divided_slice_read(imu,'extF',job%extmat_suffix,dimen,extF_me,ierror) + endif ! if (ierror==1) cycle ! @@ -1946,13 +1966,17 @@ subroutine TRconvert_matel_j0_eigen(jrot) ! else ! - if(mpi_rank.eq.0) call divided_slice_write(imu,'extF',job%j0extmat_suffix,Neigenroots,mat_s) + if (trim(job%kinetmat_format).eq.'MPIIO') then + call divided_slice_write_mpi(imu,'extF',job%j0extmat_suffix,Neigenroots,mat_s,mat_s_block_type) + else + if(mpi_rank.eq.0) call divided_slice_write(imu,'extF',job%j0extmat_suffix,Neigenroots,mat_s) + endif ! endif ! enddo ! Reset view to flat file - if (blacs_size.gt.1) then + if ((.not.job%IOextF_divide) .and. blacs_size.gt.1) then read_offset = read_offset + (fitting%iparam(2)-fitting%iparam(1)+1)*ncontr_t*ncontr_t*mpi_real_size & + (fitting%iparam(2)-fitting%iparam(1)+1)*mpi_int_size call MPI_File_set_view(fileh, int(0,MPI_OFFSET_KIND), mpi_byte, mpi_byte, "native", MPI_INFO_NULL, ierr) @@ -2049,6 +2073,50 @@ subroutine divided_slice_write(islice,name,suffix,N,field) close(chkptIO) ! end subroutine divided_slice_write + ! + ! + subroutine divided_slice_write_mpi(islice,name,suffix,N,field,block_type) + ! + implicit none + ! + integer(ik),intent(in) :: islice + character(len=*),intent(in) :: name,suffix + integer(ik),intent(in) :: N + real(rk),dimension(:,:),intent(in) :: field + type(MPI_Datatype),intent(in) :: block_type + character(len=4) :: jchar + character(len=cl) :: filename + type(MPI_File) :: chkptMPIIO + integer(kind=MPI_OFFSET_KIND) :: offset + integer :: ierr + ! + write(job_is,"('single swap_matrix MPI')") + ! + ! + write(jchar, '(i4)') islice + ! + filename = trim(suffix)//trim(adjustl(jchar))//'.chk' + ! + offset = 0 + call MPI_File_open(mpi_comm_world, filename, mpi_mode_wronly+mpi_mode_create, mpi_info_null, chkptMPIIO, ierr) + call MPI_File_set_size(chkptMPIIO, offset, ierr) + ! + if(mpi_rank .eq. 0) call MPI_File_write(chkptMPIIO,name,len(trim(name)),mpi_character,mpi_status_ignore,ierr) + ! + !call MPI_File_get_position(chkptMPIIO, offset, ierr) + offset = int(len(trim(name)),MPI_OFFSET_KIND) + call MPI_File_set_view(chkptMPIIO, offset, mpi_byte, block_type, "native", MPI_INFO_NULL, ierr) + ! + call MPI_File_write_all(chkptMPIIO, field, size(field), mpi_double_precision, mpi_status_ignore, ierr) + ! + offset = offset + N*N*mpi_real_size + call MPI_File_set_view(chkptMPIIO, offset, mpi_byte, mpi_byte, "native", MPI_INFO_NULL, ierr) + ! + if(mpi_rank .eq. 0) call MPI_File_write(chkptMPIIO,name,len(trim(name)),mpi_character,mpi_status_ignore,ierr) + ! + call MPI_File_close(chkptMPIIO, ierr) + ! + end subroutine divided_slice_write_mpi ! ! subroutine divided_slice_read(islice,name,suffix,N,field,ierror) @@ -2113,6 +2181,78 @@ subroutine divided_slice_read(islice,name,suffix,N,field,ierror) ! end subroutine divided_slice_read ! + ! + subroutine divided_slice_read_mpi(islice,name,suffix,N,field,block_type,ierr) + ! + implicit none + ! + integer(ik),intent(in) :: islice + character(len=*),intent(in) :: name,suffix + integer(ik),intent(in) :: N + real(rk),dimension(:,:),intent(out) :: field + type(MPI_Datatype),intent(in) :: block_type + integer(ik),intent(out) :: ierr + character(len=4) :: jchar + type(MPI_File) :: chkptMPIIO + character(len=cl) :: buf,filename,job_is + integer(ik) :: ilen + integer(MPI_OFFSET_KIND) :: offset + ! + ierror = 0 + ! + write(job_is,"('single swap_matrix MPI')") + ! + write(jchar, '(i4)') islice + ! + filename = trim(suffix)//trim(adjustl(jchar))//'.chk' + ilen = LEN_TRIM(name) + ! + call MPI_File_open(mpi_comm_world, filename, mpi_mode_rdonly, mpi_info_null, chkptMPIIO, ierr) + ! + if (ierr .ne. 0) then + ! + ! This error code will allow simply skipping the corresponding record/file without crashing the program + ! + ierr = 1 + ! + ! we allow to skip opening the file only for the external matrix elements + ! + if (trim(name)/="extF") then + write (out,"(' kinetic divided_slice_read_mpi in slice ',a20,': file does not exist')") filename + stop 'divided_slice_read_mpi - in slice - file does not exist' + endif + ! + if (job%verbose>=4) write (out,"(' (skipped).')",advance='YES') + ! + return + endif + ! + call MPI_File_read_all(chkptMPIIO,buf,ilen,mpi_character,mpi_status_ignore,ierr) + ! + if ( trim(buf(1:ilen))/=trim(name) ) then + write (out,"(' kinetic divided_slice_read_mpi in slice ',a20,': header is missing or wrong: ',a)") filename,buf(1:ilen) + stop 'divided_slice_read_mpi - in slice - header missing or wrong' + end if + ! + offset = ilen + call MPI_File_set_view(chkptMPIIO, int(ilen,MPI_OFFSET_KIND), mpi_byte, block_type, "native", MPI_INFO_NULL, ierr) + call MPI_File_read_all(chkptMPIIO, field, size(field), mpi_double_precision, mpi_status_ignore, ierr) + ! + offset = offset + N*N*mpi_real_size + call MPI_File_set_view(chkptMPIIO, offset, mpi_byte, mpi_byte, "native", MPI_INFO_NULL, ierr) + ! + call MPI_File_read_all(chkptMPIIO,buf,ilen,mpi_character,mpi_status_ignore,ierr) + ! + if ( trim(buf(1:ilen))/=trim(name) ) then + write (out,"(' kinetic divided_slice_read_mpi in slice ',a20,': header is missing or wrong',a)") filename,buf(1:ilen) + stop 'divided_slice_read_mpi - in slice - header missing or wrong' + end if + ! + call MPI_File_close(chkptMPIIO, ierr) + ! + end subroutine divided_slice_read_mpi + ! + ! subroutine divided_slice_read_vibrot(islice,suffix,N,field) ! implicit none @@ -2542,7 +2682,7 @@ subroutine restore_rot_kinetic_matrix_elements_mpi(jrot,treat_vibration,kinetic_ !read(chkptIO) buf18(1:11) call MPI_File_read_all(fileh, buf18, 11, mpi_character, mpi_status_ignore, ierr) if (buf18(1:11)/='icontr_ideg') then - write (out,"(' Vib. kinetic checkpoint file ',a,': icontr_ideg is missing ',a)") job%kinetmat_file,buf18(1:11) + write (out,"(' Vib. kinetic checkpoint file ',a,': icontr_ideg is missing ',a)") trim(job%kinetmat_file),buf18(1:11) stop 'restore_rot_kinetic_matrix_elements - in file - icontr_ideg missing' end if ! @@ -2560,7 +2700,7 @@ subroutine restore_rot_kinetic_matrix_elements_mpi(jrot,treat_vibration,kinetic_ !read(chkptIO) buf18(1:4) call MPI_File_read_all(fileh, buf18, 5, mpi_character, mpi_status_ignore, ierr) if (buf18(1:5)/='g_rot') then - write (out,"(' Vib. kinetic checkpoint file ',a,': g_rot is missing ',a)") 'mpiiofile',buf18(1:5) + write (out,"(' Vib. kinetic checkpoint file ',a,': g_rot is missing ',a)") trim(job%kinetmat_file),buf18(1:5) ! if (buf18(1:4)=='hvib'.or.buf18(1:3)=='End') & write (out,"(a,a)") & From ab2017459f9022d8365612c257e7fe490a4ed042 Mon Sep 17 00:00:00 2001 From: Arjen Tamerus Date: Fri, 12 Jul 2019 10:50:46 +0100 Subject: [PATCH 18/79] Fix MPI/POSIX IO switch, fix calculation of lower matrix in perturbation.f90, clean output --- accuracy.f90 | 4 +- fields.f90 | 4 +- makefile | 6 +- mpi_aux.f90 | 19 +- perturbation.f90 | 226 ++++++++++++++---------- timer.f90 | 4 +- tran.f90 | 449 ++++++++++++++++++++++++++++------------------- trove.f90 | 5 +- 8 files changed, 422 insertions(+), 295 deletions(-) diff --git a/accuracy.f90 b/accuracy.f90 index 9696582..f546076 100644 --- a/accuracy.f90 +++ b/accuracy.f90 @@ -15,8 +15,8 @@ module accuracy integer, parameter :: drk = selected_real_kind(12,25) ! "Double" reals and complex (complexi? :-) integer, parameter :: rk = selected_real_kind(12,25) ! "Normal" reals and complex (complexi? :-) integer, parameter :: ark = selected_real_kind(25,32) ! "Accurate" reals and complex (complexi? :-) - integer, parameter :: inp = 5 ! Output I/O channel - integer, parameter :: out = 6 ! Output I/O channel + integer, parameter :: inp = 5 ! Input I/O channel + integer :: out = 6 ! Output I/O channel integer, parameter :: nfilelegendre = 101 ! Dump-outout channel for eigenfunction ! universal constants diff --git a/fields.f90 b/fields.f90 index 33576ff..bc0cd0a 100644 --- a/fields.f90 +++ b/fields.f90 @@ -15477,7 +15477,7 @@ subroutine fingerprintWrite write(chkptIO,"(i8,' <- Jrot, rotational angular momentum')") bset%dscr(0)%range(1) ! do imode = 0,trove%Nmodes - write(chkptIO,"(6x,i4,1x,3(a10,1x),i5,3x,a2,3x,i2,5x,i2,1x,2i4,2x,f6.1,2x,i9,1x,2f9.3,1x,l,1x,i2,1x,a10,i9,l,l,l)") & + write(chkptIO,"(6x,i4,1x,3(a10,1x),i5,3x,a2,3x,i2,5x,i2,1x,2i4,2x,f6.1,2x,i9,1x,2f9.3,1x,i2,1x,i2,1x,a10,i9,i3,i3,i3)") & imode, bset%dscr(imode) enddo ! @@ -15577,7 +15577,7 @@ subroutine fingerprintRead ! !read(chkptIO,"(6x,i4,1x,3(a10,1x),i5,3x,a2,3x,i2,5x,i2,1x,3i4,2x,f6.1,2x,i9,1x,2f9.3,1x,i,i)") imode_,bs_ ! - read(chkptIO,"(6x,i4,1x,3(a10,1x),i5,3x,a2,3x,i2,5x,i2,1x,2i4,2x,f6.1,2x,i9,1x,2f9.3,1x,l,1x,i2,1x,a10,i9,i2,i2)") & + read(chkptIO,"(6x,i4,1x,3(a10,1x),i5,3x,a2,3x,i2,5x,i2,1x,2i4,2x,f6.1,2x,i9,1x,2f9.3,1x,i2,1x,i2,1x,a10,i9,i2,i2)") & imode_,bs_%type,bs_%COORD_KINET,bs_%COORD_POTEN,bs_%MODEL,bs_%DIM,bs_%SPECIES,bs_%CLASS,bs_%RANGE,& bs_%RES_COEFFS,bs_%NPOINTS,bs_%BORDERS,bs_%PERIODIC,bs_%IPERIOD ! diff --git a/makefile b/makefile index cc25a32..957aebd 100644 --- a/makefile +++ b/makefile @@ -10,16 +10,18 @@ checkin: pot_user = pot_ch4 -PLAT = _2205_i17 +PLAT = +#PLAT = _2205_i17 #FOR = ifort FOR = mpif90 -FFLAGS = -qopenmp -xHost -O3 -ip +FFLAGS = -qopenmp -xHost -O3 -ip -g3 -traceback #FFLAGS = -fopenmp -ffree-line-length-none -march=native -O3 -fcray-pointer -g3 #ARPACK = ~/libraries/ARPACK/libarpack_omp_64.a #LAPACK = -L/usr/local/software/spack/spack-0.11.2/opt/spack/linux-rhel7-x86_64/gcc-5.4.0/openblas-0.2.20-gbzlk5wei7fsojje2fiwj7w5wssikb73/lib -lopenblas +#LAPACK = -mkl=parallel -lscalapack LAPACK = -mkl=parallel -lmkl_scalapack_lp64 -lmkl_blacs_intelmpi_lp64 #LAPACK = -L${MKLROOT}/lib/intel64 -Wl,--no-as-needed -lmkl_gf_lp64 -lmkl_sequential -lmkl_core -lpthread -lm -ldl diff --git a/mpi_aux.f90 b/mpi_aux.f90 index 117746d..98b6708 100644 --- a/mpi_aux.f90 +++ b/mpi_aux.f90 @@ -11,7 +11,7 @@ module mpi_aux public co_startdim, co_enddim public blacs_size, blacs_rank, blacs_ctxt - public nprow,npcol,myprow,mypcol, desca,descb,descc + public nprow,npcol,myprow,mypcol public mpi_real_size, mpi_int_size interface co_sum @@ -29,10 +29,6 @@ module mpi_aux !blacs/pblas integer :: blacs_size, blacs_rank, blacs_ctxt integer :: nprow,npcol,myprow,mypcol - integer :: desca(9) - integer :: descb(9) - integer :: descc(9) - integer :: descd(9) integer,dimension(2) :: blacs_dims contains @@ -69,7 +65,7 @@ subroutine co_block_type_init(smat, dimx, dimy, descr, allocinfo, mpi_type) type(MPI_Datatype),intent(out),optional :: mpi_type integer,dimension(2) :: global_size, distr, dargs - integer :: MB,NB,MLOC,NLOC,ierr + integer :: MB,NB,MLOC,NLOC,ierr integer,external :: NUMROC @@ -133,6 +129,10 @@ subroutine co_init_comms() call MPI_Type_size(mpi_double_precision, mpi_real_size,ierr) call MPI_Type_size(mpi_integer, mpi_int_size,ierr) + if (mpi_rank.ne.0) then + open(newunit=out, file='/dev/null', status='replace', iostat=ierr, action="write") + endif + comms_inited = .true. call co_init_blacs() @@ -326,8 +326,8 @@ subroutine co_read_matrix_distr(x, longdim, lb, ub, infile) call TimerStart('MPI_read_matrix') if (comm_size.gt.1) then - offset_start = (lb-1)*longdim*mpi_real_size - offset_end = (longdim-ub)*longdim*mpi_real_size + offset_start = (lb-1)*int(longdim, MPI_OFFSET_KIND)*mpi_real_size + offset_end = (longdim-ub)*int(longdim, MPI_OFFSET_KIND)*mpi_real_size call MPI_File_seek(infile, offset_start, MPI_SEEK_CUR) call MPI_File_read_all(infile,x,1,mpitype_column,writestat,ierr) @@ -355,8 +355,9 @@ subroutine co_write_matrix_distr(x, longdim, lb, ub, outfile) call TimerStart('MPI_write_matrix') if (comm_size.gt.1) then - offset_start = (lb-1)*longdim*mpi_real_size + offset_start = (lb-1)*int(longdim,MPI_OFFSET_KIND)*mpi_real_size offset_end = 0 + call mpi_barrier(mpi_comm_world, ierr) call MPI_File_seek(outfile, offset_start, MPI_SEEK_END) call MPI_File_write_all(outfile,x,1,mpitype_column,writestat,ierr) diff --git a/perturbation.f90 b/perturbation.f90 index ab32f19..86e79ff 100644 --- a/perturbation.f90 +++ b/perturbation.f90 @@ -6851,9 +6851,12 @@ subroutine PThamiltonian_contract(jrot) call TimerStart('Calculating the Hamiltonian matrix') ! task = 'top' - !call PTrestore_rot_kinetic_matrix_elements(jrot,task,iunit,dimen,ncontr,maxcontr) - call PTrestore_rot_kinetic_matrix_elements_mpi(jrot,task,mpiiofile,dimen,& - ncontr,maxcontr) + if (trim(job%kinetmat_format).eq.'MPIIO') then + call PTrestore_rot_kinetic_matrix_elements_mpi(jrot,task,mpiiofile,dimen,& + ncontr,maxcontr) + else + call PTrestore_rot_kinetic_matrix_elements(jrot,task,iunit,dimen,ncontr,maxcontr) + endif ! ! We have two calculation options: fast and cheap and slow but expensive. ! @@ -6870,15 +6873,20 @@ subroutine PThamiltonian_contract(jrot) call TimerStart('Restoring KE matrix') ! task = 'rot' - !call PTrestore_rot_kinetic_matrix_elements(jrot,task,iunit,dimen,ncontr,maxcontr) - call PTrestore_rot_kinetic_matrix_elements_mpi(jrot,task,mpiiofile,dimen,& - !PT, PTvibrational_me_calc,grot,gcor,hvib, & - ncontr,maxcontr) + if (trim(job%kinetmat_format).eq.'MPIIO') then + call PTrestore_rot_kinetic_matrix_elements_mpi(jrot,task,mpiiofile,dimen,& + ncontr,maxcontr) + else + call PTrestore_rot_kinetic_matrix_elements(jrot,task,iunit,dimen,ncontr,maxcontr) + endif + ! task = 'cor' - !call PTrestore_rot_kinetic_matrix_elements(jrot,task,iunit,dimen,ncontr,maxcontr) - call PTrestore_rot_kinetic_matrix_elements_mpi(jrot,task,mpiiofile,dimen,& - !PT, PTvibrational_me_calc,grot,gcor,hvib, & - ncontr,maxcontr) + if (trim(job%kinetmat_format).eq.'MPIIO') then + call PTrestore_rot_kinetic_matrix_elements_mpi(jrot,task,mpiiofile,dimen,& + ncontr,maxcontr) + else + call PTrestore_rot_kinetic_matrix_elements(jrot,task,iunit,dimen,ncontr,maxcontr) + endif ! call TimerStop('Restoring KE matrix') ! @@ -6893,10 +6901,12 @@ subroutine PThamiltonian_contract(jrot) call TimerStart('Restoring KE matrix') ! task = 'vib' - !call PTrestore_rot_kinetic_matrix_elements(jrot,task,iunit,dimen,ncontr,maxcontr) - call PTrestore_rot_kinetic_matrix_elements_mpi(jrot,task,mpiiofile,dimen,& - !PT, PTvibrational_me_calc,grot,gcor,hvib, & - ncontr,maxcontr) + if (trim(job%kinetmat_format).eq.'MPIIO') then + call PTrestore_rot_kinetic_matrix_elements_mpi(jrot,task,mpiiofile,dimen,& + ncontr,maxcontr) + else + call PTrestore_rot_kinetic_matrix_elements(jrot,task,iunit,dimen,ncontr,maxcontr) + endif call TimerStop('Restoring KE matrix') ! @@ -6978,10 +6988,12 @@ subroutine PThamiltonian_contract(jrot) if (job%verbose>=4) write(out,"(/' Construct the Hamiltonian matrix...')") ! task = 'top-icontr' - !call PTrestore_rot_kinetic_matrix_elements(jrot,task,iunit,dimen,ncontr,maxcontr) - call PTrestore_rot_kinetic_matrix_elements_mpi(jrot,task,mpiiofile,dimen,& - !PT, PTvibrational_me_calc,grot,gcor,hvib, & - ncontr,maxcontr) + if (trim(job%kinetmat_format).eq.'MPIIO') then + call PTrestore_rot_kinetic_matrix_elements_mpi(jrot,task,mpiiofile,dimen,& + ncontr,maxcontr) + else + call PTrestore_rot_kinetic_matrix_elements(jrot,task,iunit,dimen,ncontr,maxcontr) + endif ! if (job%verbose>=5) write(out,"(' N Arrays of ',f12.5,'Gb each will be allocated (N is the number of processors)')") & real(sym%Nrepresen,rk)*real(PT%max_deg_size,rk)*real(max_dim,rk)*real(rk,rk)/1024.0_rk**3 @@ -7001,25 +7013,32 @@ subroutine PThamiltonian_contract(jrot) if (FLrotation.and.jrot/=0) then ! task = 'rot-icontr' - !call PTrestore_rot_kinetic_matrix_elements(jrot,task,iunit,dimen,ncontr,maxcontr,icontr) - call PTrestore_rot_kinetic_matrix_elements_mpi(jrot,task,mpiiofile,dimen,& - !PT, PTvibrational_me_calc,grot,gcor,hvib, & - ncontr,maxcontr) + if (trim(job%kinetmat_format).eq.'MPIIO') then + call PTrestore_rot_kinetic_matrix_elements_mpi(jrot,task,mpiiofile,dimen,& + ncontr,maxcontr) + else + call PTrestore_rot_kinetic_matrix_elements(jrot,task,iunit,dimen,ncontr,maxcontr,icontr) + endif + ! task = 'cor-icontr' - !call PTrestore_rot_kinetic_matrix_elements(jrot,task,iunit,dimen,ncontr,maxcontr,icontr) - call PTrestore_rot_kinetic_matrix_elements_mpi(jrot,task,mpiiofile,dimen,& - !PT, PTvibrational_me_calc,grot,gcor,hvib, & - ncontr,maxcontr) + if (trim(job%kinetmat_format).eq.'MPIIO') then + call PTrestore_rot_kinetic_matrix_elements_mpi(jrot,task,mpiiofile,dimen,& + ncontr,maxcontr) + else + call PTrestore_rot_kinetic_matrix_elements(jrot,task,iunit,dimen,ncontr,maxcontr,icontr) + endif ! endif ! if ( PTvibrational_me_calc ) then ! task = 'vib-icontr' - !call PTrestore_rot_kinetic_matrix_elements(jrot,task,iunit,dimen,ncontr,maxcontr,icontr) - call PTrestore_rot_kinetic_matrix_elements_mpi(jrot,task,mpiiofile,dimen,& - !PT, PTvibrational_me_calc,grot,gcor,hvib, & - ncontr,maxcontr) + if (trim(job%kinetmat_format).eq.'MPIIO') then + call PTrestore_rot_kinetic_matrix_elements_mpi(jrot,task,mpiiofile,dimen,& + ncontr,maxcontr) + else + call PTrestore_rot_kinetic_matrix_elements(jrot,task,iunit,dimen,ncontr,maxcontr,icontr) + endif ! endif ! @@ -7098,10 +7117,12 @@ subroutine PThamiltonian_contract(jrot) ! task = 'rot' ! - !call PTrestore_rot_kinetic_matrix_elements(jrot,task,iunit,dimen,ncontr,maxcontr) - call PTrestore_rot_kinetic_matrix_elements_mpi(jrot,task,mpiiofile,dimen,& - !PT, PTvibrational_me_calc,grot,gcor,hvib, & - ncontr,maxcontr) + if (trim(job%kinetmat_format).eq.'MPIIO') then + call PTrestore_rot_kinetic_matrix_elements_mpi(jrot,task,mpiiofile,dimen,& + ncontr,maxcontr) + else + call PTrestore_rot_kinetic_matrix_elements(jrot,task,iunit,dimen,ncontr,maxcontr) + endif ! !$omp parallel private(mat_t,alloc_p) allocate (mat_t(sym%Nrepresen,PT%max_deg_size,max_dim),stat=alloc_p) @@ -7167,10 +7188,12 @@ subroutine PThamiltonian_contract(jrot) ! task = 'cor' ! - !call PTrestore_rot_kinetic_matrix_elements(jrot,task,iunit,dimen,ncontr,maxcontr) - call PTrestore_rot_kinetic_matrix_elements_mpi(jrot,task,mpiiofile,dimen,& - !PT, PTvibrational_me_calc,grot,gcor,hvib, & - ncontr,maxcontr) + if (trim(job%kinetmat_format).eq.'MPIIO') then + call PTrestore_rot_kinetic_matrix_elements_mpi(jrot,task,mpiiofile,dimen,& + ncontr,maxcontr) + else + call PTrestore_rot_kinetic_matrix_elements(jrot,task,iunit,dimen,ncontr,maxcontr) + endif ! !$omp parallel private(mat_t,alloc_p) allocate (mat_t(sym%Nrepresen,PT%max_deg_size,max_dim),stat=alloc_p) @@ -7242,10 +7265,12 @@ subroutine PThamiltonian_contract(jrot) ! task = 'vib' ! - !call PTrestore_rot_kinetic_matrix_elements(jrot,task,iunit,dimen,ncontr,maxcontr) - call PTrestore_rot_kinetic_matrix_elements_mpi(jrot,task,mpiiofile,dimen,& - !PT, PTvibrational_me_calc,grot,gcor,hvib, & - ncontr,maxcontr) + if (trim(job%kinetmat_format).eq.'MPIIO') then + call PTrestore_rot_kinetic_matrix_elements_mpi(jrot,task,mpiiofile,dimen,& + ncontr,maxcontr) + else + call PTrestore_rot_kinetic_matrix_elements(jrot,task,iunit,dimen,ncontr,maxcontr) + endif ! !$omp parallel private(mat_t,alloc_p) allocate (mat_t(sym%Nrepresen,PT%max_deg_size,max_dim),stat=alloc_p) @@ -7848,12 +7873,9 @@ subroutine PTrestore_rot_kinetic_matrix_elements_mpi(jrot, task, fileh, dimen, & stop 'PTrestore_rot_kinetic_matrix_elements - in file - icontr_cnu missing' end if - !call mpi_file_get_size(fileh,file_offset,ierr) - file_offset = (PT%Nclasses+1)*ncontr*mpi_int_size + file_offset = (PT%Nclasses+1)*int(ncontr,MPI_OFFSET_KIND)*mpi_int_size call mpi_file_seek(fileh, file_offset, MPI_SEEK_CUR) - !call MPI_File_read_all(fileh, imat_t, (PT%Nclasses+1)*ncontr, mpi_integer, mpi_status_ignore, ierr) - call MPI_File_read_all(fileh, readbuf, 11, mpi_character, mpi_status_ignore, ierr) if (readbuf(1:11)/='icontr_ideg') then if (mpi_rank .eq. 0) write (out,"(' Vib. kinetic checkpoint file ',a,': icontr_ideg is missing ',a)") job%kinetmat_file,readbuf(1:11) @@ -7861,8 +7883,7 @@ subroutine PTrestore_rot_kinetic_matrix_elements_mpi(jrot, task, fileh, dimen, & stop 'PTrestore_rot_kinetic_matrix_elements - in file - icontr_ideg missing' end if - !call MPI_File_read_all(fileh, imat_t, (PT%Nclasses+1)*ncontr, mpi_integer, mpi_status_ignore, ierr) - file_offset = (PT%Nclasses+1)*ncontr*mpi_int_size + file_offset = (PT%Nclasses+1)*int(ncontr,MPI_OFFSET_KIND)*mpi_int_size call mpi_file_seek(fileh, file_offset, MPI_SEEK_CUR) !deallocate(imat_t) @@ -8076,7 +8097,7 @@ subroutine PTrestore_rot_kinetic_matrix_elements_mpi(jrot, task, fileh, dimen, & stop 'PTrestore_rot_kinetic_matrix_elements - in file - g_rot missing' end if ! - file_offset = 9*ncontr*ncontr*mpi_real_size + file_offset = 9*int(ncontr,MPI_OFFSET_KIND)*ncontr*mpi_real_size call MPI_File_seek(fileh, file_offset, MPI_SEEK_CUR) ! call MPI_File_read_all(fileh, readbuf, 5, mpi_character, mpi_status_ignore, ierr) @@ -8086,7 +8107,7 @@ subroutine PTrestore_rot_kinetic_matrix_elements_mpi(jrot, task, fileh, dimen, & stop 'PTrestore_rot_kinetic_matrix_elements - in file - g_cor missing' end if ! - file_offset = 3*ncontr*ncontr*mpi_real_size + file_offset = 3*int(ncontr,MPI_OFFSET_KIND)*ncontr*mpi_real_size call MPI_File_seek(fileh, file_offset, MPI_SEEK_CUR) ! !deallocate(mat_t) @@ -9131,14 +9152,14 @@ recursive subroutine symm_mat_element_vector_k(jrot,irow,ijterm,func,mat_t,no_di ! if(abs(mat_elem)>1.0_rk) then !write(out,"(/'Non-diagonal element between different symmetries:')") - write(out,"(/'<',a4,2i6,'|H|',a4,2i6,'> = ',g18.10,a)") & + write(0,"(/'<',a4,2i6,'|H|',a4,2i6,'> = ',g18.10,a)") & sym%label(isym),irow,iterm+ielem,sym%label(jsym),jrow,jterm+jelem,mat_elem,& ' Non-diagonal element between different symmetries is too large!' ! ! ! special case for linear molecules and E-symmetries. Not an ideal solution! if (trove%lincoord==0.or.all( (/isym,jsym/)<=4 ) ) then - stop 'non-zero element between two symmetries' + stop 'non-zero element between two symmetries - symm_mat_element_vector_k' endif endif endif @@ -9283,7 +9304,7 @@ recursive subroutine symm_mat_element_vector(jrot,irow,ijterm,func,mat_t) write(out,"(/'<',a4,3i6,'|H|',a4,3i6,'> = ',g18.10,a)") & sym%label(isym),irow,ielem,iterm+ielem,sym%label(jsym),jrow,jelem,jterm+jelem,mat_elem,& ' Non-diagonal element (euler) between different symmetries is too large!' - stop 'non-zero element between two symmetries' + stop 'non-zero element between two symmetries - symm_mat_element_vector' endif ! endif @@ -9529,7 +9550,7 @@ recursive subroutine transfer_to_symmetric_representatoin(irow,jrow,ijterm,hcont ! ! special case for linear molecules and E-symmetries. Not an ideal solution! if (trove%lincoord==0.or.all( (/isym,jsym/)<=4 ) ) then - stop 'non-zero element between two symmetries' + stop 'non-zero element between two symmetries - transfer_to_symmetric_representatoin' endif endif ! @@ -15499,7 +15520,7 @@ subroutine PTcontracted_matelem_class(jrot) ! ! The vibrational part of the Hamiltonian ! - if (job%verbose>=4) write(out,"(' allocating hvib, ',i9,' elements...')") rootsize + if (job%verbose>=4) write(out,"(' allocating hvib, ',i12,' elements...')") rootsize ! allocate(hvib%me(mdimen_b,startdim:startdim+mdimen_p-1),stat=alloc) call ArrayStart('gvib-grot-gcor-fields',alloc,1,kind(f_t),rootsize) @@ -15517,7 +15538,7 @@ subroutine PTcontracted_matelem_class(jrot) ! if (job%verbose>=2) write(out,"(/'Rotational part of the Kinetic energy operator...')") ! - if (job%verbose>=4) write(out,"(' allocating grot, ',i9,' elements...')") rootsize + if (job%verbose>=4) write(out,"(' allocating grot, ',i12,' elements...')") rootsize ! if (job%IOmatelem_split) then ! @@ -15584,7 +15605,6 @@ subroutine PTcontracted_matelem_class(jrot) islice = 0 job_is = 'grot' ! - ! create column datatype for MPI-IO ! TODO clean up do k1 = 1,3 do k2 = 1,3 @@ -15654,7 +15674,6 @@ subroutine PTcontracted_matelem_class(jrot) ! if (trim(job%kinetmat_format).eq.'MPIIO') then if(mpi_rank.eq.0) then - call MPI_File_seek(chkptMPIIO, mpioffset, MPI_SEEK_END) call MPI_File_write(chkptMPIIO,'g_cor',5,mpi_character,mpi_status_ignore,ierr) endif else @@ -15710,7 +15729,9 @@ subroutine PTcontracted_matelem_class(jrot) enddo ! enddo + ! call co_distr_data(grot_t, recvbuf, mdimen_p, startdim, enddim) + ! do icoeff=startdim,enddim do jcoeff=1,icoeff-1 grot_t(jcoeff,icoeff) = -1*grot_t(jcoeff,icoeff) @@ -15775,7 +15796,7 @@ subroutine PTcontracted_matelem_class(jrot) ! ! The vibrational part of the Hamiltonian ! - !if (job%verbose>=4) write(out,"(' allocating hvib, ',i9,' elements...')") rootsize + !if (job%verbose>=4) write(out,"(' allocating hvib, ',i12,' elements...')") rootsize ! !allocate(hvib%me(rootsize),stat=alloc) !call ArrayStart('gvib-grot-gcor-fields',alloc,1,kind(f_t),rootsize) @@ -15820,36 +15841,41 @@ subroutine PTcontracted_matelem_class(jrot) ! if (job%verbose>=2) write(out,"('...end!')") ! - !POSIXIO!if (treat_rotation.and.trim(job%IOkinet_action)=='SAVE') then - !POSIXIO! ! - !POSIXIO! ! store the rotational matrix elements - !POSIXIO! ! - !POSIXIO! write(chkptIO) 'g_rot' - !POSIXIO! ! - !POSIXIO! do k1 = 1,3 - !POSIXIO! do k2 = 1,3 - !POSIXIO! ! - !POSIXIO! write(chkptIO) grot_(k1,k2,:,:) - !POSIXIO! ! - !POSIXIO! enddo - !POSIXIO! enddo - !POSIXIO! ! - !POSIXIO! write(chkptIO) 'g_cor' - !POSIXIO! ! - !POSIXIO! ! store the Coriolis matrix elements - !POSIXIO! ! - !POSIXIO! do k1 = 1,PT%Nmodes - !POSIXIO! do k2 = 1,3 - !POSIXIO! ! - !POSIXIO! write(chkptIO) gcor_(k1,k2,:,:) - !POSIXIO! ! - !POSIXIO! enddo - !POSIXIO! enddo - !POSIXIO! ! - !POSIXIO! deallocate(grot_,gcor_) - !POSIXIO! call ArrayStop('grot-gcor-fields') - !POSIXIO! ! - !POSIXIO!endif + if (treat_rotation.and.trim(job%IOkinet_action)=='SAVE') then + if (trim(job%kinetmat_format).eq.'MPIIO') then + write(out,*) "TODO implement MPI-IO version !POSIXIO!" + stop "Not yet implemented" + else + ! + ! store the rotational matrix elements + ! + write(chkptIO) 'g_rot' + ! + do k1 = 1,3 + do k2 = 1,3 + ! + write(chkptIO) grot_(k1,k2,:,:) + ! + enddo + enddo + ! + write(chkptIO) 'g_cor' + ! + ! store the Coriolis matrix elements + ! + do k1 = 1,PT%Nmodes + do k2 = 1,3 + ! + write(chkptIO) gcor_(k1,k2,:,:) + ! + enddo + enddo + ! + deallocate(grot_,gcor_) + call ArrayStop('grot-gcor-fields') + ! + endif + endif ! else ! if (.not.job%IOmatelem_split.or.job%iswap(1)==0 ) then ! @@ -16720,10 +16746,15 @@ subroutine calc_contract_matrix_elements_II(iterm,k1,k2,fl,field,func) !ib0 = icoefficoeff1(icoeff) ! do jcoeff=(b-1)*mdimen_p+1,b*mdimen_p - if (jcoeff .gt. PT%Maxcontracts) cycle + if (jcoeff .gt. PT%Maxcontracts) cycle ! - iroot = icoeff2iroot(1,icoeff) - jroot = icoeff2iroot(1,jcoeff) + if (jcoeff.le.icoeff) then + iroot = icoeff2iroot(1,icoeff) + jroot = icoeff2iroot(1,jcoeff) + else + iroot = icoeff2iroot(1,jcoeff) + jroot = icoeff2iroot(1,icoeff) + endif ! f_t = matclass(1,iroot,jroot) ! @@ -16731,8 +16762,13 @@ subroutine calc_contract_matrix_elements_II(iterm,k1,k2,fl,field,func) ! do iclasses = 2,Nclasses ! - iroot = icoeff2iroot(iclasses,icoeff) - jroot = icoeff2iroot(iclasses,jcoeff) + if (jcoeff.le.icoeff) then + iroot = icoeff2iroot(iclasses,icoeff) + jroot = icoeff2iroot(iclasses,jcoeff) + else + iroot = icoeff2iroot(iclasses,jcoeff) + jroot = icoeff2iroot(iclasses,icoeff) + endif ! !f_prod(iclasses) = mat_tt(iclasses)%coeffs(iroot,jroot) ! diff --git a/timer.f90 b/timer.f90 index a6031a0..90943fa 100644 --- a/timer.f90 +++ b/timer.f90 @@ -548,8 +548,8 @@ subroutine ArrayStart(name,alloc,isize,ikind,hsize) size_ = (ikind*real(hsize_))/real(1024**3) ! size in GByte if (alloc/=0) then - write(out,"(/' Error ',i8,' trying to allocate array ',a)") alloc,name - write(out,"( ' Array dimension = ',i8,' array size = ',f20.2,' Gb')") hsize_,size_ + write(0,"(/' Error ',i8,' trying to allocate array ',a)") alloc,name + write(0,"( ' Array dimension = ',i8,' array size = ',f20.2,' Gb')") hsize_,size_ call MemoryReport stop 'ArrayStart - allocation error' end if diff --git a/tran.f90 b/tran.f90 index 922eedb..b76b4c0 100644 --- a/tran.f90 +++ b/tran.f90 @@ -50,7 +50,8 @@ module tran ! type(bset_contrT), allocatable,save :: bset_contr(:) ! information on the contracted basis set ! note: bset_contr(1) is always reserved for J=0 - integer(ik), allocatable :: i2d_to_1d(:,:) ! a 2d upper diagonal matrix index is transformed into a 1d matrix + !AT - seems unused + !integer(ik), allocatable :: i2d_to_1d(:,:) ! a 2d upper diagonal matrix index is transformed into a 1d matrix type(PTeigenT), allocatable :: eigen(:) ! Eigensolution: description of eigenvectors ! used in the intensity calculations. @@ -312,9 +313,6 @@ subroutine index_correlation(njval, jval) stop 'index_correlation: Nclasses cannot be different for diff. J' endif ! - allocate(cnu_i(1:nclasses),cnu_j(1:nclasses),stat = info) - if (info /= 0) stop 'index_correlation: cnu_i allocation error' - ! if (tran_debug > 2) then write(out, '(/a, 1x, i2)') 'find correlation between contraction indexes for J = 0 and J =', jval(jind) end if @@ -329,6 +327,13 @@ subroutine index_correlation(njval, jval) 'J =', jval(jind), 'J = 0', 'icase', 'ilambda', 'iroot', 'icase', 'ilambda', 'iroot' end if ! + !$omp parallel default(shared) & + !$omp& private(cnu_i, cnu_j, nclasses, info, icase, jcase, ilambda, jlambda, found, my_fmt) & + !$omp& private(iroot,jcontr,ilevel,ideg,k,tau) + allocate(cnu_i(1:nclasses),cnu_j(1:nclasses),stat = info) + if (info /= 0) stop 'index_correlation: cnu_i allocation error' + ! + !$omp do schedule(dynamic) l_icase : do icase = 1, bset_contr(jind)%Maxsymcoeffs ! cnu_i(1:nclasses) = bset_contr(jind)%contractive_space(1:nclasses, icase) @@ -377,6 +382,7 @@ subroutine index_correlation(njval, jval) end do l_ilambda ! end do l_icase + !$omp end do ! ! The same but for a 1d index iroot in place of (icase,lambda) ! @@ -387,6 +393,8 @@ subroutine index_correlation(njval, jval) allocate(bset_contr(jind)%k(bset_contr(jind)%Maxcontracts), stat = info) call ArrayStart('bset_contr',info,size(bset_contr(jind)%k),kind(bset_contr(jind)%k)) ! + ! $omp parallel do default(shared) private(iroot,icase,ilambda,jcontr,ilevel,ideg,k,tau) + !$omp do schedule(dynamic) do iroot = 1, bset_contr(jind)%Maxcontracts ! icase = bset_contr(jind)%icontr2icase(iroot, 1) @@ -415,33 +423,36 @@ subroutine index_correlation(njval, jval) end if ! enddo + !$omp end do + ! + deallocate(cnu_i,cnu_j) + !$omp end parallel ! if (tran_debug > 2) then write(out, '(/a)') 'done' end if ! - deallocate(cnu_i,cnu_j) - ! end do - ! - ! Introduce the address-index matrix for correspondence between 1d and 2d (i=2) write(out,"('...done!')") ! @@ -1136,7 +1147,7 @@ subroutine TRconvert_repres_J0_to_contr(Jrot) integer(ik) :: iroot,ilevel,gamma,Jval(1) ! - if (job%verbose>=2) write(out,"(/'Convert the J=0 eigenvec. to contracted representaion ')") + if (job%verbose>=2) write(out,"(/'Convert the J=0 eigenvec. to contracted representation ')") ! call MemoryReport ! @@ -1348,7 +1359,7 @@ subroutine TRconvert_matel_j0_eigen(jrot) ! iroot = iroot + 1 ! - ! $omp parallel do private(icoeff,irow,ib,iterm,ielem) shared(vec) schedule(dynamic) + !$omp parallel do private(icoeff,irow,ib,iterm,ielem,blacs_row,blacs_col,i_local,j_local) default(shared) schedule(dynamic) do icoeff = 1,dimen ! call infog2l(icoeff,iroot,desc_psi,nprow,npcol,myprow,mypcol,i_local,j_local,blacs_row,blacs_col) @@ -1369,7 +1380,7 @@ subroutine TRconvert_matel_j0_eigen(jrot) endif ! enddo - ! $omp end parallel do + !$omp end parallel do ! end do else @@ -1481,30 +1492,45 @@ subroutine TRconvert_matel_j0_eigen(jrot) if (.not.job%IOmatelem_split.or.job%iswap(1)<=1) then ! job_is ='Eigen-vib. matrix elements of the rot. kinetic part' - call IOStart(trim(job_is),chkptIO) ! - !open(chkptIO,form='unformatted',action='write',position='rewind',status='replace',file=job%kineteigen_file) - !write(chkptIO) 'Start Kinetic part' - call MPI_File_open(mpi_comm_world, job%kineteigen_file, mpi_mode_wronly+mpi_mode_create, mpi_info_null, fileh_w, ierr) - call MPI_File_set_errhandler(fileh_w, MPI_ERRORS_ARE_FATAL) - mpioffset = 0 - call MPI_File_set_size(fileh_w, mpioffset, ierr) - if(mpi_rank .eq. 0) then - call MPI_File_write(fileh_w, '[MPIIO]', 7, mpi_character, mpi_status_ignore, ierr) - call MPI_File_write(fileh_w, 'Start Kinetic part', 18, mpi_character, mpi_status_ignore, ierr) - ! - treat_vibration = .false. - ! - !call PTstore_icontr_cnu(Neigenroots,chkptIO,job%IOj0matel_action) - call PTstorempi_icontr_cnu(Neigenroots,fileh_w,job%IOj0matel_action) + if (trim(job%kinetmat_format).eq.'MPIIO') then + call MPI_File_open(mpi_comm_world, job%kineteigen_file, mpi_mode_wronly+mpi_mode_create, mpi_info_null, fileh_w, ierr) + call MPI_File_set_errhandler(fileh_w, MPI_ERRORS_ARE_FATAL) + mpioffset = 0 + call MPI_File_set_size(fileh_w, mpioffset, ierr) ! - if (job%vib_rot_contr) then - !write(chkptIO) 'vib-rot' - call MPI_File_write(fileh_w, 'vib-rot', 7, mpi_character, mpi_status_ignore, ierr) + if(mpi_rank .eq. 0) then + call MPI_File_write(fileh_w, '[MPIIO]', 7, mpi_character, mpi_status_ignore, ierr) + call MPI_File_write(fileh_w, 'Start Kinetic part', 18, mpi_character, mpi_status_ignore, ierr) + ! + treat_vibration = .false. + ! + !call PTstore_icontr_cnu(Neigenroots,chkptIO,job%IOj0matel_action) + call PTstorempi_icontr_cnu(Neigenroots,fileh_w,job%IOj0matel_action) + ! + if (job%vib_rot_contr) then + !write(chkptIO) 'vib-rot' + call MPI_File_write(fileh_w, 'vib-rot', 7, mpi_character, mpi_status_ignore, ierr) + endif + else + mpioffset = 0 + treat_vibration = .false. endif else - mpioffset = 0 + call IOStart(trim(job_is),chkptIO) + ! + open(chkptIO,form='unformatted',action='write',position='rewind',status='replace',file=job%kineteigen_file) + write(chkptIO) 'Start Kinetic part' + ! treat_vibration = .false. + if(mpi_rank .eq. 0) then + ! + call PTstore_icontr_cnu(Neigenroots,chkptIO,job%IOj0matel_action) + ! + if (job%vib_rot_contr) then + write(chkptIO) 'vib-rot' + endif + endif endif ! endif @@ -1549,20 +1575,27 @@ subroutine TRconvert_matel_j0_eigen(jrot) ! task = 'rot' ! - !write(chkptIO) 'g_rot' - if(mpi_rank.eq.0) call MPI_File_write(fileh_w, 'g_rot', 5, mpi_character, mpi_status_ignore, ierr) - call mpi_barrier(MPI_COMM_WORLD, ierr) - call MPI_File_seek(fileh_w, int(0,MPI_OFFSET_KIND), MPI_SEEK_END) - ! - !call restore_rot_kinetic_matrix_elements(jrot,treat_vibration,task,iunit) - call restore_rot_kinetic_matrix_elements_mpi(jrot,treat_vibration,task,fileh) + if (trim(job%kinetmat_format).eq.'MPIIO') then + if(mpi_rank.eq.0) call MPI_File_write(fileh_w, 'g_rot', 5, mpi_character, mpi_status_ignore, ierr) + call mpi_barrier(MPI_COMM_WORLD, ierr) + call MPI_File_seek(fileh_w, int(0,MPI_OFFSET_KIND), MPI_SEEK_END) + ! + call restore_rot_kinetic_matrix_elements_mpi(jrot,treat_vibration,task,fileh) + else + write(chkptIO) 'g_rot' + ! + call restore_rot_kinetic_matrix_elements(jrot,treat_vibration,task,iunit) + endif ! else ! task = 'top' ! - !call restore_rot_kinetic_matrix_elements(jrot,treat_vibration,task,iunit) - call restore_rot_kinetic_matrix_elements_mpi(jrot,treat_vibration,task,fileh) + if (trim(job%kinetmat_format).eq.'MPIIO') then + call restore_rot_kinetic_matrix_elements_mpi(jrot,treat_vibration,task,fileh) + else + call restore_rot_kinetic_matrix_elements(jrot,treat_vibration,task,iunit) + endif ! endif ! @@ -1602,9 +1635,11 @@ subroutine TRconvert_matel_j0_eigen(jrot) ! else ! - ! read(iunit) gmat - call MPI_File_read_all(fileh, gmat, size(gmat), mpi_double_precision, mpi_status_ignore, ierr) - !write (chkptIO) gmat + if (trim(job%kinetmat_format).eq.'MPIIO') then + call MPI_File_read_all(fileh, gmat, size(gmat), mpi_double_precision, mpi_status_ignore, ierr) + else + read(iunit) gmat + endif ! endif ! @@ -1635,8 +1670,11 @@ subroutine TRconvert_matel_j0_eigen(jrot) ! else ! - !write (chkptIO) mat_s - call MPI_File_write_all(fileh_w, mat_s, size(mat_s), mpi_double_precision, mpi_status_ignore, ierr) + if (trim(job%kinetmat_format).eq.'MPIIO') then + call MPI_File_write_all(fileh_w, mat_s, size(mat_s), mpi_double_precision, mpi_status_ignore, ierr) + else + write (chkptIO) mat_s + endif ! endif ! @@ -1646,11 +1684,11 @@ subroutine TRconvert_matel_j0_eigen(jrot) ! ! Reset view to flat file if ((.not.job%IOmatelem_split) .and. blacs_size.gt.1) then - read_offset = read_offset + 9*dimen*dimen*mpi_real_size + read_offset = read_offset + 9*int(dimen,MPI_OFFSET_KIND)*dimen*mpi_real_size call MPI_File_set_view(fileh, int(0,MPI_OFFSET_KIND), mpi_byte, mpi_byte, "native", MPI_INFO_NULL, ierr) call MPI_File_seek(fileh, read_offset, MPI_SEEK_SET) - write_offset = write_offset + 9*Neigenroots*Neigenroots*mpi_real_size + write_offset = write_offset + 9*int(Neigenroots,MPI_OFFSET_KIND)*Neigenroots*mpi_real_size call MPI_File_set_view(fileh_w, int(0,MPI_OFFSET_KIND), mpi_byte, mpi_byte, "native", MPI_INFO_NULL, ierr) call MPI_File_seek(fileh_w, write_offset, MPI_SEEK_SET) endif @@ -1664,13 +1702,18 @@ subroutine TRconvert_matel_j0_eigen(jrot) if (.not.job%IOmatelem_split) then ! task = 'cor' - ! - call restore_rot_kinetic_matrix_elements_mpi(jrot,treat_vibration,task,fileh) - ! - !write(chkptIO) 'g_cor' - if(mpi_rank.eq.0) call MPI_File_write(fileh_w, 'g_cor', 5, mpi_character, mpi_status_ignore, ierr) - call MPI_Barrier(MPI_COMM_WORLD, ierr) - call MPI_File_seek(fileh_w, int(0,MPI_OFFSET_KIND), MPI_SEEK_END) + if (trim(job%kinetmat_format).eq.'MPIIO') then + ! + call restore_rot_kinetic_matrix_elements_mpi(jrot,treat_vibration,task,fileh) + ! + if(mpi_rank.eq.0) call MPI_File_write(fileh_w, 'g_cor', 5, mpi_character, mpi_status_ignore, ierr) + call MPI_Barrier(MPI_COMM_WORLD, ierr) + call MPI_File_seek(fileh_w, int(0,MPI_OFFSET_KIND), MPI_SEEK_END) + else + call restore_rot_kinetic_matrix_elements(jrot,treat_vibration,task,iunit) + ! + write(chkptIO) 'g_cor' + endif ! endif ! @@ -1712,8 +1755,11 @@ subroutine TRconvert_matel_j0_eigen(jrot) ! else ! - ! read(iunit) gmat - call MPI_File_read_all(fileh, gmat, size(gmat), mpi_double_precision, mpi_status_ignore, ierr) + if (trim(job%kinetmat_format).eq.'MPIIO') then + call MPI_File_read_all(fileh, gmat, size(gmat), mpi_double_precision, mpi_status_ignore, ierr) + else + read(iunit) gmat + endif ! endif ! @@ -1744,8 +1790,11 @@ subroutine TRconvert_matel_j0_eigen(jrot) ! else ! - !write (chkptIO) mat_s - call MPI_File_write_all(fileh_w, mat_s, size(mat_s), mpi_double_precision, mpi_status_ignore, ierr) + if (trim(job%kinetmat_format).eq.'MPIIO') then + call MPI_File_write_all(fileh_w, mat_s, size(mat_s), mpi_double_precision, mpi_status_ignore, ierr) + else + write (chkptIO) mat_s + endif ! endif ! @@ -1755,28 +1804,40 @@ subroutine TRconvert_matel_j0_eigen(jrot) ! ! Reset view to flat file if ((.not.job%IOmatelem_split) .and. blacs_size.gt.1) then - read_offset = read_offset + 3*dimen*dimen*mpi_real_size + read_offset = read_offset + 3*int(dimen,MPI_OFFSET_KIND)*dimen*mpi_real_size call MPI_File_set_view(fileh, int(0,MPI_OFFSET_KIND), mpi_byte, mpi_byte, "native", MPI_INFO_NULL, ierr) call MPI_File_seek(fileh, read_offset, MPI_SEEK_SET) - write_offset = write_offset + 3*Neigenroots*Neigenroots*mpi_real_size + write_offset = write_offset + 3*int(Neigenroots,MPI_OFFSET_KIND)*Neigenroots*mpi_real_size call MPI_File_set_view(fileh_w, int(0,MPI_OFFSET_KIND), mpi_byte, mpi_byte, "native", MPI_INFO_NULL, ierr) call MPI_File_seek(fileh_w, write_offset, MPI_SEEK_SET) endif ! if (job%verbose>=5) call TimerStop('J0-convertion for g_cor') ! - !if (.not.job%IOmatelem_split.or.job%iswap(1)==1) write(chkptIO) 'End Kinetic part' - if ((.not.job%IOmatelem_split.or.job%iswap(1)==1).and.(mpi_rank.eq.0)) call MPI_File_write(fileh_w, 'End Kinetic part', 16, mpi_character, mpi_status_ignore, ierr) + if ((.not.job%IOmatelem_split.or.job%iswap(1)==1).and.(mpi_rank.eq.0)) then + if (trim(job%kinetmat_format).eq.'MPIIO') then + call MPI_File_write(fileh_w, 'End Kinetic part', 16, mpi_character, mpi_status_ignore, ierr) + else + write(chkptIO) 'End Kinetic part' + endif + endif ! if (.not.job%vib_rot_contr) then - !close(chkptIO,status='keep') - call MPI_File_close(fileh_w, ierr) + if (trim(job%kinetmat_format).eq.'MPIIO') then + call MPI_File_close(fileh_w, ierr) + else + close(chkptIO,status='keep') + endif endif ! task = 'end' ! - call restore_rot_kinetic_matrix_elements_mpi(jrot,treat_vibration,task,fileh) + if (trim(job%kinetmat_format).eq.'MPIIO') then + call restore_rot_kinetic_matrix_elements_mpi(jrot,treat_vibration,task,fileh) + else + call restore_rot_kinetic_matrix_elements(jrot,treat_vibration,task,iunit) + endif ! if (allocated(gmat)) deallocate(gmat) call ArrayStop('gmat-fields') @@ -1784,7 +1845,12 @@ subroutine TRconvert_matel_j0_eigen(jrot) if (job%verbose>=3) write(out,"(' ...done!')") ! endif - call MPI_File_close(fileh, ierr) + ! + if (trim(job%kinetmat_format).eq.'MPIIO') then + call MPI_File_close(fileh, ierr) + else + close(chkptIO,status='keep') + endif ! ! External field part ! @@ -1813,31 +1879,47 @@ subroutine TRconvert_matel_j0_eigen(jrot) ! filename = job%extFmat_file ! - !open(iunit,form='unformatted',action='read',position='rewind',status='old',file=filename) - ! - call MPI_File_open(mpi_comm_world, filename, mpi_mode_rdonly, mpi_info_null, fileh, ierr) - ! - call MPI_File_read_all(fileh, buf20, 7, mpi_character, mpi_status_ignore, ierr) - if (buf20(1:7)/='[MPIIO]') then - write (out,"(' Vib. kinetic checkpoint file ',a,' is not an MPIIO file: ',a)") filename, buf20 - stop 'restore_vib_matric_elemets - Not an MPIIO file' - end if - ! - !read(iunit) buf20 - call MPI_File_read_all(fileh, buf20, 20, mpi_character, mpi_status_ignore, ierr) - if (buf20/='Start external field') then - write (out,"(' restore_vib_matrix_elements ',a,' has bogus header: ',a)") filename,buf20 - stop 'restore_vib_matrix_elements - bogus file format' - end if - ! - !read(iunit) ncontr_t - call MPI_File_read_all(fileh, ncontr_t, 1, mpi_integer, mpi_status_ignore, ierr) - ! - if (bset_contr(1)%Maxcontracts/=ncontr_t) then - write (out,"(' Dipole moment checkpoint file ',a)") filename - write (out,"(' Actual and stored basis sizes at J=0 do not agree ',2i8)") bset_contr(1)%Maxcontracts,ncontr_t - stop 'restore_Extvib_matrix_elements - in file - illegal ncontracts ' - end if + if (trim(job%kinetmat_format).eq.'MPIIO') then + ! + call MPI_File_open(mpi_comm_world, filename, mpi_mode_rdonly, mpi_info_null, fileh, ierr) + ! + call MPI_File_read_all(fileh, buf20, 7, mpi_character, mpi_status_ignore, ierr) + if (buf20(1:7)/='[MPIIO]') then + write (out,"(' Vib. kinetic checkpoint file ',a,' is not an MPIIO file: ',a)") filename, buf20 + stop 'restore_vib_matric_elemets - Not an MPIIO file' + end if + ! + call MPI_File_read_all(fileh, buf20, 20, mpi_character, mpi_status_ignore, ierr) + if (buf20/='Start external field') then + write (out,"(' restore_vib_matrix_elements ',a,' has bogus header: ',a)") filename,buf20 + stop 'restore_vib_matrix_elements - bogus file format' + end if + ! + call MPI_File_read_all(fileh, ncontr_t, 1, mpi_integer, mpi_status_ignore, ierr) + ! + if (bset_contr(1)%Maxcontracts/=ncontr_t) then + write (out,"(' Dipole moment checkpoint file ',a)") filename + write (out,"(' Actual and stored basis sizes at J=0 do not agree ',2i8)") bset_contr(1)%Maxcontracts,ncontr_t + stop 'restore_Extvib_matrix_elements - in file - illegal ncontracts ' + end if + ! + else + open(iunit,form='unformatted',action='read',position='rewind',status='old',file=filename) + ! + read(iunit) buf20 + if (buf20/='Start external field') then + write (out,"(' restore_vib_matrix_elements ',a,' has bogus header: ',a)") filename,buf20 + stop 'restore_vib_matrix_elements - bogus file format' + end if + ! + read(iunit) ncontr_t + if (bset_contr(1)%Maxcontracts/=ncontr_t) then + write (out,"(' Dipole moment checkpoint file ',a)") filename + write (out,"(' Actual and stored basis sizes at J=0 do not agree ',2i8)") bset_contr(1)%Maxcontracts,ncontr_t + stop 'restore_Extvib_matrix_elements - in file - illegal ncontracts ' + end if + ! + endif ! endif ! @@ -1848,27 +1930,38 @@ subroutine TRconvert_matel_j0_eigen(jrot) job_is ='external field contracted matrix elements for J=0' call IOStart(trim(job_is),chkptIO) ! - !open(chkptIO,form='unformatted',action='write',position='rewind',status='replace',file=job%exteigen_file) - !write(chkptIO) 'Start external field' - call MPI_File_open(mpi_comm_world, job%exteigen_file, mpi_mode_wronly+mpi_mode_create, mpi_info_null, fileh_w, ierr) - call MPI_File_set_errhandler(fileh_w, MPI_ERRORS_ARE_FATAL) - mpioffset = 0 - call MPI_File_set_size(fileh_w, mpioffset, ierr) - if(mpi_rank.eq.0) then - call MPI_File_write(fileh_w, '[MPIIO]', 7, mpi_character, mpi_status_ignore, ierr) - call MPI_File_write(fileh_w, 'Start external field', 20, mpi_character, mpi_status_ignore, ierr) + if (trim(job%kinetmat_format).eq.'MPIIO') then + ! + call mpi_file_open(mpi_comm_world, job%exteigen_file, mpi_mode_wronly+mpi_mode_create, mpi_info_null, fileh_w, ierr) + call mpi_file_set_errhandler(fileh_w, mpi_errors_are_fatal) + ! + mpioffset = 0 + call mpi_file_set_size(fileh_w, mpioffset, ierr) + ! + if(mpi_rank.eq.0) then + call mpi_file_write(fileh_w, '[MPIIO]', 7, mpi_character, mpi_status_ignore, ierr) + call mpi_file_write(fileh_w, 'start external field', 20, mpi_character, mpi_status_ignore, ierr) + endif + ! + ! store the matrix elements + ! + if(mpi_rank.eq.0) call mpi_file_write(fileh_w, neigenroots, 1, mpi_integer, mpi_status_ignore, ierr) + call mpi_barrier(mpi_comm_world, ierr) + call mpi_file_seek(fileh_w, int(0,mpi_offset_kind), mpi_seek_end) + ! + else + ! + open(chkptio,form='unformatted',action='write',position='rewind',status='replace',file=job%exteigen_file) + write(chkptio) 'start external field' + ! + ! store the matrix elements + ! + write(chkptio) neigenroots endif ! - ! store the matrix elements - ! - !write(chkptIO) Neigenroots - if(mpi_rank.eq.0) call MPI_File_write(fileh_w, Neigenroots, 1, mpi_integer, mpi_status_ignore, ierr) - call mpi_barrier(mpi_comm_world, ierr) - call MPI_File_seek(fileh_w, int(0,MPI_OFFSET_KIND), MPI_SEEK_END) - ! endif ! - !if (job%IOextF_divide) close(iunit) + if (trim(job%kinetmat_format).ne.'MPIIO'.and.job%IOextF_divide) close(iunit) ! rootsize = int(ncontr_t*(ncontr_t+1)/2,hik) rootsize2= int(ncontr_t*ncontr_t,hik) @@ -1909,11 +2002,15 @@ subroutine TRconvert_matel_j0_eigen(jrot) ! else ! - !read(iunit) imu_t - call MPI_File_read_all(fileh, imu_t, 1, mpi_integer, mpi_status_ignore, ierr) - ! - !read(iunit) extF_me - call MPI_File_read_all(fileh, extF_me, size(extF_me), mpi_double_precision, mpi_status_ignore, ierr) + if (trim(job%kinetmat_format).eq.'MPIIO') then + call MPI_File_read_all(fileh, imu_t, 1, mpi_integer, mpi_status_ignore, ierr) + ! + call MPI_File_read_all(fileh, extF_me, size(extF_me), mpi_double_precision, mpi_status_ignore, ierr) + else + read(iunit) imu_t + ! + read(iunit) extF_me + endif ! endif ! @@ -1957,12 +2054,15 @@ subroutine TRconvert_matel_j0_eigen(jrot) ! if (.not.job%IOextF_divide.or.job%IOextF_stitch) then ! - !write(chkptIO) imu - !write(chkptIO) mat_s - if(mpi_rank.eq.0) call MPI_File_write(fileh_w, imu, 1, mpi_integer, mpi_status_ignore, ierr) - call MPI_Barrier(mpi_comm_world, ierr) - call MPI_File_seek(fileh_w, int(0,MPI_OFFSET_KIND), MPI_SEEK_END) - call MPI_File_write(fileh_w, mat_s, size(mat_s), mpi_double_precision, mpi_status_ignore, ierr) + if (trim(job%kinetmat_format).eq.'MPIIO') then + if(mpi_rank.eq.0) call MPI_File_write(fileh_w, imu, 1, mpi_integer, mpi_status_ignore, ierr) + call MPI_Barrier(mpi_comm_world, ierr) + call MPI_File_seek(fileh_w, int(0,MPI_OFFSET_KIND), MPI_SEEK_END) + call MPI_File_write(fileh_w, mat_s, size(mat_s), mpi_double_precision, mpi_status_ignore, ierr) + else + write(chkptIO) imu + write(chkptIO) mat_s + endif ! else ! @@ -1977,12 +2077,12 @@ subroutine TRconvert_matel_j0_eigen(jrot) enddo ! Reset view to flat file if ((.not.job%IOextF_divide) .and. blacs_size.gt.1) then - read_offset = read_offset + (fitting%iparam(2)-fitting%iparam(1)+1)*ncontr_t*ncontr_t*mpi_real_size & + read_offset = read_offset + (fitting%iparam(2)-fitting%iparam(1)+1)*int(ncontr_t,MPI_OFFSET_KIND)*ncontr_t*mpi_real_size & + (fitting%iparam(2)-fitting%iparam(1)+1)*mpi_int_size call MPI_File_set_view(fileh, int(0,MPI_OFFSET_KIND), mpi_byte, mpi_byte, "native", MPI_INFO_NULL, ierr) call MPI_File_seek(fileh, read_offset, MPI_SEEK_SET) - write_offset = write_offset + 3*Neigenroots*Neigenroots*mpi_real_size + write_offset = write_offset + 3*int(Neigenroots,MPI_OFFSET_KIND)*Neigenroots*mpi_real_size call MPI_File_set_view(fileh_w, int(0,MPI_OFFSET_KIND), mpi_byte, mpi_byte, "native", MPI_INFO_NULL, ierr) call MPI_File_seek(fileh_w, write_offset, MPI_SEEK_SET) endif @@ -1992,15 +2092,22 @@ subroutine TRconvert_matel_j0_eigen(jrot) ! if (.not.job%IOextF_divide) then ! - !read(iunit) buf20(1:18)double_precision - call MPI_File_read_all(fileh, buf20, 18, mpi_character, mpi_status_ignore, ierr) + if (trim(job%kinetmat_format).eq.'MPIIO') then + call MPI_File_read_all(fileh, buf20, 18, mpi_character, mpi_status_ignore, ierr) + else + read(iunit) buf20(1:18) + endif + ! if (buf20(1:18)/='End external field') then write (out,"(' restore_Extvib_matrix_elements ',a,' has bogus footer: ',a)") filename,buf20(1:18) stop 'restore_Extvib_matrix_elements - bogus file format' end if ! - !close(iunit,status='keep') - call MPI_File_close(fileh, ierr) + if (trim(job%kinetmat_format).eq.'MPIIO') then + call MPI_File_close(fileh, ierr) + else + close(iunit,status='keep') + endif ! !job_is ='external field contracted matrix elements for J=0' !call IOStart(trim(job_is),iunit) @@ -2009,10 +2116,13 @@ subroutine TRconvert_matel_j0_eigen(jrot) ! if (.not.job%IOextF_divide.or.job%IOextF_stitch) then ! - !write(chkptIO) 'End external field' - if(mpi_rank.eq.0) call MPI_File_write(fileh_w, 'End external field', 18, mpi_character, mpi_status_ignore, ierr) - !close(chkptIO,status='keep') - call MPI_File_close(fileh_w, ierr) + if (trim(job%kinetmat_format).eq.'MPIIO') then + if(mpi_rank.eq.0) call MPI_File_write(fileh_w, 'End external field', 18, mpi_character, mpi_status_ignore, ierr) + call MPI_File_close(fileh_w, ierr) + else + write(chkptIO) 'End external field' + close(chkptIO,status='keep') + endif ! endif ! @@ -2109,7 +2219,7 @@ subroutine divided_slice_write_mpi(islice,name,suffix,N,field,block_type) ! call MPI_File_write_all(chkptMPIIO, field, size(field), mpi_double_precision, mpi_status_ignore, ierr) ! - offset = offset + N*N*mpi_real_size + offset = offset + N*int(N,MPI_OFFSET_KIND)*mpi_real_size call MPI_File_set_view(chkptMPIIO, offset, mpi_byte, mpi_byte, "native", MPI_INFO_NULL, ierr) ! if(mpi_rank .eq. 0) call MPI_File_write(chkptMPIIO,name,len(trim(name)),mpi_character,mpi_status_ignore,ierr) @@ -2238,7 +2348,7 @@ subroutine divided_slice_read_mpi(islice,name,suffix,N,field,block_type,ierr) call MPI_File_set_view(chkptMPIIO, int(ilen,MPI_OFFSET_KIND), mpi_byte, block_type, "native", MPI_INFO_NULL, ierr) call MPI_File_read_all(chkptMPIIO, field, size(field), mpi_double_precision, mpi_status_ignore, ierr) ! - offset = offset + N*N*mpi_real_size + offset = offset + N*int(N,MPI_OFFSET_KIND)*mpi_real_size call MPI_File_set_view(chkptMPIIO, offset, mpi_byte, mpi_byte, "native", MPI_INFO_NULL, ierr) ! call MPI_File_read_all(chkptMPIIO,buf,ilen,mpi_character,mpi_status_ignore,ierr) @@ -2465,7 +2575,7 @@ subroutine restore_rot_kinetic_matrix_elements(jrot,treat_vibration,kinetic_part read(chkptIO) buf18(1:5) if (buf18(1:5)/='g_cor') then write (out,"(' Vib. kinetic checkpoint file ',a,': g_cor is missing ',a)") job%kinetmat_file,buf18(1:5) - stop 'restore_rot_kinetic_matrix_elements - in file - g_cor missing' + stop 'restore_rot_kinetic_matrix_elements_posix - in file - g_cor missing' end if ! case('end') @@ -2608,7 +2718,6 @@ subroutine restore_rot_kinetic_matrix_elements_mpi(jrot,treat_vibration,kinetic_ integer(ik),intent(in) :: jrot logical,intent(in) :: treat_vibration character(len=3),intent(in) :: kinetic_part - !integer(ik),intent(inout) :: chkptIO type(MPI_File),intent(in) :: fileh ! integer(ik) :: iclasses,alloc,ilevel,ib,max_deg_size,nclasses,islice @@ -2625,31 +2734,26 @@ subroutine restore_rot_kinetic_matrix_elements_mpi(jrot,treat_vibration,kinetic_ ! case('rot','top') ! - job_is ='Vib. matrix elements of the rot. kinetic' - !call IOStart(trim(job_is),chkptIO) + job_is ='Vib. matrix elements of the rot. kinetic [mpi]' ! - !open(chkptIO,form='unformatted',action='read',position='rewind',status='old',file=job%kinetmat_file) - !call MPI_File_Open(mpi_comm_world, 'mpiiofile', mpi_mode_rdonly, mpi_info_null, fileh, ierr) - ! - !read(chkptIO) buf18 call MPI_File_read_all(fileh, buf18, 7, mpi_character, mpi_status_ignore, ierr) if (buf18(1:7)/='[MPIIO]') then write (out,"(' Vib. kinetic checkpoint file ',a,' is not an MPIIO file: ',a)") job%kinetmat_file,buf18 - stop 'PTcontracted_matelem_class - Not an MPIIO file' + stop 'restore_rot_kinetic_matrix_elements_mpi - Not an MPIIO file' end if + ! call MPI_File_read_all(fileh, buf18, 18, mpi_character, mpi_status_ignore, ierr) if (buf18/='Start Kinetic part') then write (out,"(' Vib. kinetic checkpoint file ',a,' has bogus header: ',a)") job%kinetmat_file,buf18 - stop 'PTcontracted_matelem_class - bogus file format' + stop 'restore_rot_kinetic_matrix_elements_mpi - bogus file format' end if ! - !read(chkptIO) ncontr_t call MPI_File_read_all(fileh, ncontr_t, 1, mpi_integer, mpi_status_ignore, ierr) ! if (bset_contr(1)%Maxcontracts/=ncontr_t) then write (out,"(' Vib. kinetic checkpoint file ',a)") job%kinetmat_file write (out,"(' Actual and stored basis sizes at J=0 do not agree ',2i0)") bset_contr(1)%Maxcontracts,ncontr_t - stop 'PTcontracted_matelem_class - in file - illegal nroots ' + stop 'restore_rot_kinetic_matrix_elements_mpi - in file - illegal nroots ' end if ! rootsize = bset_contr(1)%Maxcontracts*(bset_contr(1)%Maxcontracts+1)/2 @@ -2661,14 +2765,12 @@ subroutine restore_rot_kinetic_matrix_elements_mpi(jrot,treat_vibration,kinetic_ allocate (imat_t(0:nclasses,ncontr_t),stat=alloc) call ArrayStart('contractive_space',alloc,size(imat_t),kind(imat_t)) ! - !read(chkptIO) buf18(1:10) call MPI_File_read_all(fileh, buf18, 10, mpi_character, mpi_status_ignore, ierr) if (buf18(1:10)/='icontr_cnu') then write (out,"(' Vib. kinetic checkpoint file ',a,': icontr_cnu is missing ',a)") job%kinetmat_file,buf18(1:10) - stop 'restore_rot_kinetic_matrix_elements - in file - icontr_cnu missing' + stop 'restore_rot_kinetic_matrix_elements_mpi - in file - icontr_cnu missing' end if ! - !read(chkptIO) imat_t(0:nclasses,1:ncontr_t) call MPI_File_read_all(fileh, imat_t, (nclasses+1)*ncontr_t, mpi_integer, mpi_status_ignore, ierr) ! if (job%vib_rot_contr) then @@ -2679,14 +2781,12 @@ subroutine restore_rot_kinetic_matrix_elements_mpi(jrot,treat_vibration,kinetic_ ! endif ! - !read(chkptIO) buf18(1:11) call MPI_File_read_all(fileh, buf18, 11, mpi_character, mpi_status_ignore, ierr) if (buf18(1:11)/='icontr_ideg') then write (out,"(' Vib. kinetic checkpoint file ',a,': icontr_ideg is missing ',a)") trim(job%kinetmat_file),buf18(1:11) - stop 'restore_rot_kinetic_matrix_elements - in file - icontr_ideg missing' + stop 'restore_rot_kinetic_matrix_elements_mpi - in file - icontr_ideg missing' end if ! - !read(chkptIO) imat_t(0:nclasses,1:ncontr_t) call MPI_File_read_all(fileh, imat_t, (nclasses+1)*ncontr_t, mpi_integer, mpi_status_ignore, ierr) ! deallocate(imat_t) @@ -2697,7 +2797,6 @@ subroutine restore_rot_kinetic_matrix_elements_mpi(jrot,treat_vibration,kinetic_ ! if (trim(kinetic_part)=='rot') then ! - !read(chkptIO) buf18(1:4) call MPI_File_read_all(fileh, buf18, 5, mpi_character, mpi_status_ignore, ierr) if (buf18(1:5)/='g_rot') then write (out,"(' Vib. kinetic checkpoint file ',a,': g_rot is missing ',a)") trim(job%kinetmat_file),buf18(1:5) @@ -2706,19 +2805,12 @@ subroutine restore_rot_kinetic_matrix_elements_mpi(jrot,treat_vibration,kinetic_ write (out,"(a,a)") & ' Most likely the split chk-points are supposed to be used.',& 'Re-do MATELEM SAVE or use SPLIT in MATELEM !' - stop 'restore_rot_kinetic_matrix_elements - in file - g_rot missing' + stop 'restore_rot_kinetic_matrix_elements_mpi - in file - g_rot missing' end if ! endif ! if (job%vib_rot_contr) then - ! - !inquire(iolength=rec_len) f_t - !rec_len = rec_len*ncontr*PT%max_deg_size - !! - !do islice = 1,9+3*PT%Nmodes - ! call divided_slice_open_vib_rot(islice,rec_len,job%matelem_suffix) - !enddo ! call find_groundstate_icontr(bset_contr(1)%nclasses) ! @@ -2726,11 +2818,10 @@ subroutine restore_rot_kinetic_matrix_elements_mpi(jrot,treat_vibration,kinetic_ ! case('cor') ! - !read(chkptIO) buf18(1:5) call MPI_File_read_all(fileh, buf18, 5, mpi_character, mpi_status_ignore, ierr) if (buf18(1:5)/='g_cor') then write (out,"(' Vib. kinetic checkpoint file ',a,': g_cor is missing ',a)") job%kinetmat_file,buf18(1:5) - stop 'restore_rot_kinetic_matrix_elements - in file - g_cor missing' + stop 'restore_rot_kinetic_matrix_elements_mpi - in file - g_cor missing' end if ! case('end') @@ -2743,16 +2834,12 @@ subroutine restore_rot_kinetic_matrix_elements_mpi(jrot,treat_vibration,kinetic_ ! endif ! - !read(chkptIO) buf18(1:4) call MPI_File_read_all(fileh, buf18, 4, mpi_character, mpi_status_ignore, ierr) if (buf18(1:4)/='hvib'.and.buf18(1:3)/='End'.and.buf18(1:4)/='vib-') then write (out,"(' Vib. kinetic checkpoint file ',a,': hvib, vib-rot, End is missing ',a)") job%kinetmat_file,buf18(1:4) - stop 'restore_rot_kinetic_matrix_elements - in file - hvib or End missing' + stop 'restore_rot_kinetic_matrix_elements_mpi - in file - hvib or End missing' end if ! - !close(chkptIO,status='keep') - !call MPI_File_close(fileh, ierr) - ! end select ! contains diff --git a/trove.f90 b/trove.f90 index 6b89f25..bc65fd1 100644 --- a/trove.f90 +++ b/trove.f90 @@ -40,6 +40,7 @@ subroutine ptmain ! Begin with constants intitialization ! call TimerStart('TROVE') + call co_init_comms() ! call accuracyInitialize ! @@ -180,9 +181,9 @@ subroutine ptmain ! ! Convert the J=0 basis set and mat.elements to the contracted represent. ! - call co_init_comms() if (action%convert_vibme) then call TRconvert_matel_j0_eigen(j) + call co_finalize_comms() return endif ! @@ -204,7 +205,7 @@ subroutine ptmain ! ! convert to j=0 representation as part of the first step j=0 calculation ! - if (mpi_rank.eq.0 .and. job%convert_model_j0) then + if (job%convert_model_j0) then ! call TRconvert_repres_J0_to_contr(j) call TRconvert_matel_j0_eigen(j) From e37d217c557c65d37997b319b80736360fde107a Mon Sep 17 00:00:00 2001 From: Arjen Tamerus Date: Thu, 1 Aug 2019 12:39:04 +0100 Subject: [PATCH 19/79] gfortran fixes --- mpi_aux.f90 | 2 +- perturbation.f90 | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/mpi_aux.f90 b/mpi_aux.f90 index 98b6708..d865757 100644 --- a/mpi_aux.f90 +++ b/mpi_aux.f90 @@ -78,7 +78,7 @@ subroutine co_block_type_init(smat, dimx, dimy, descr, allocinfo, mpi_type) call DESCINIT(descr, dimy, dimx, MB, NB, 0, 0, blacs_ctxt, max(MLOC,1), ierr) allocate(smat(MLOC,NLOC), stat=allocinfo) - if(allocinfo) return + if(allocinfo.ne.0) return if (present(mpi_type)) then global_size = (/dimy, dimx/) diff --git a/perturbation.f90 b/perturbation.f90 index 86e79ff..9477d5c 100644 --- a/perturbation.f90 +++ b/perturbation.f90 @@ -16560,7 +16560,7 @@ subroutine divided_slice_open_mpi(islice,chkptIO,name,suffix) filename = trim(suffix)//trim(adjustl(jchar))//'.chk' ! call MPI_File_open(mpi_comm_world, filename, mpi_mode_rdonly, mpi_info_null, chkptMPIIO, ierr) - if (ierr) then + if (ierr.ne.0) then if(mpi_rank.eq.0) write(out,"('divided_slice_open-error: The split-file ',a,' does not exist')") trim(filename) stop 'divided_slice_open-error: The split-file does not exist' endif From 0206daeb83037f06e4f5101dcf3a8c9e4ee2f19a Mon Sep 17 00:00:00 2001 From: Arjen Tamerus Date: Mon, 9 Sep 2019 15:57:26 +0100 Subject: [PATCH 20/79] Bugfix: only call co_sum if select_gamma(isym) is true --- perturbation.f90 | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/perturbation.f90 b/perturbation.f90 index 9477d5c..153af2c 100644 --- a/perturbation.f90 +++ b/perturbation.f90 @@ -7339,7 +7339,8 @@ subroutine PThamiltonian_contract(jrot) endif ! do isym = 1,sym%Nrepresen - call co_sum(smat(isym)%coeffs,0) + if (.not.job%select_gamma(isym)) cycle + call co_sum(smat(isym)%coeffs) enddo call TimerStop('Calculating the Hamiltonian matrix') ! From 13af4b46b3c4f1b7a34dc5911972b9f28b45d627 Mon Sep 17 00:00:00 2001 From: Arjen Tamerus Date: Mon, 9 Sep 2019 15:59:23 +0100 Subject: [PATCH 21/79] Workaround: MKL's scalapack crashes with (correct) input on psi for large cases, work around by transposing manually and passing as 'T' instead of 'N' --- tran.f90 | 31 +++++++++++++++++++++---------- 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/tran.f90 b/tran.f90 index b76b4c0..229ce6c 100644 --- a/tran.f90 +++ b/tran.f90 @@ -1220,7 +1220,7 @@ subroutine TRconvert_matel_j0_eigen(jrot) character(len=20) :: buf20 integer(ik) :: ncontr_t,rootsize_t,junit,iterm1=0,iterm2=1e6,islice,Nterms,iterm,icoeff integer(hik) :: matsize2,matsize,rootsize,rootsize2 - real(rk),allocatable :: gmat(:,:),psi(:,:) + real(rk),allocatable :: gmat(:,:),psi(:,:),psi_t(:,:) real(rk),allocatable :: mat_s(:,:),mat_t(:,:) integer(ik),allocatable :: ijterm(:,:) double precision,parameter :: alpha = 1.0d0,beta=0.0d0 @@ -1277,17 +1277,21 @@ subroutine TRconvert_matel_j0_eigen(jrot) matsize = int(dimen*Neigenroots,hik) ! if (job%verbose>=3) write(out,"(/' Allocate two matrices of ',i8,'x',i8,' = ',i0,' elements.')") & - Neigenroots,Neigenroots,matsize + dimen,Neigenroots,matsize ! if (blacs_size.eq.1) then allocate(psi(dimen,Neigenroots),mat_t(Neigenroots,dimen),stat=info) ! call ArrayStart('psi',info,1,kind(psi),matsize) + call ArrayStart('psi_t',info,1,kind(psi_t),matsize) call ArrayStart('mat_t',info,1,kind(mat_t),matsize) else call co_block_type_init(psi, Neigenroots, dimen, desc_psi, info) call ArrayStart('psi',info,1,kind(psi),int(size(psi),hik)) - ! + + !shape(psi_t) == shape(psi^T) == shape(mat_t) + call co_block_type_init(psi_t, dimen, Neigenroots, desc_mat_t, info) + call ArrayStart('psi_t',info,1,kind(mat_t),int(size(mat_t),hik)) call co_block_type_init(mat_t, dimen, Neigenroots, desc_mat_t, info) call ArrayStart('mat_t',info,1,kind(mat_t),int(size(mat_t),hik)) endif @@ -1478,6 +1482,13 @@ subroutine TRconvert_matel_j0_eigen(jrot) ! !if (job%verbose>=5) write(out,"(/'Maximal number of non-zero values after vector compression = ',i,' out of ',i/)") cdimenmax,dimen ! + ! TODO TEMP intel bugaround - explicitly transpose psi into psi_t + if (blacs_size .gt. 1) then + write(out,*) "Explicitly transposing psi into psi_t" + call pdtran(Neigenroots, dimen, 1.0d0, psi, 1, 1, desc_psi, 0.0d0, psi_t, & + 1, 1, desc_mat_t) + endif + ! if (job%verbose>=3) write(out,"(' ...done!')") ! !if (job%verbose>=5) call TimerStop('Prepare fcoeff for J0-convertion') @@ -1647,8 +1658,8 @@ subroutine TRconvert_matel_j0_eigen(jrot) if (blacs_size.gt.1) then call pdgemm('T','N',Neigenroots,dimen,dimen,alpha,psi,1,1,desc_psi,& gmat,1,1,desc_gmat,beta,mat_t,1,1,desc_mat_t) - call pdgemm('N','N',Neigenroots,Neigenroots,dimen,alpha,mat_t,1,1,desc_mat_t,& - psi,1,1,desc_psi,beta,mat_s,1,1,desc_mat_s) + call pdgemm('N','T',Neigenroots,Neigenroots,dimen,alpha,mat_t,1,1,desc_mat_t,& + psi_t,1,1,desc_mat_t,beta,mat_s,1,1,desc_mat_s) else call dgemm('T','N',Neigenroots,dimen,dimen,alpha,psi,dimen,& gmat,dimen,beta,mat_t,Neigenroots) @@ -1766,8 +1777,8 @@ subroutine TRconvert_matel_j0_eigen(jrot) if (blacs_size.gt.1) then call pdgemm('T','N',Neigenroots,dimen,dimen,alpha,psi,1,1,desc_psi,& gmat,1,1,desc_gmat,beta,mat_t,1,1,desc_mat_t) - call pdgemm('N','N',Neigenroots,Neigenroots,dimen,alpha,mat_t,1,1,desc_mat_t,& - psi,1,1,desc_psi,beta,mat_s,1,1,desc_mat_s) + call pdgemm('N','T',Neigenroots,Neigenroots,dimen,alpha,mat_t,1,1,desc_mat_t,& + psi_t,1,1,desc_mat_t,beta,mat_s,1,1,desc_mat_s) else call dgemm('T','N',Neigenroots,dimen,dimen,alpha,psi,dimen,& gmat,dimen,beta,mat_t,Neigenroots) @@ -1886,7 +1897,7 @@ subroutine TRconvert_matel_j0_eigen(jrot) call MPI_File_read_all(fileh, buf20, 7, mpi_character, mpi_status_ignore, ierr) if (buf20(1:7)/='[MPIIO]') then write (out,"(' Vib. kinetic checkpoint file ',a,' is not an MPIIO file: ',a)") filename, buf20 - stop 'restore_vib_matric_elemets - Not an MPIIO file' + stop 'restore_vib_matric_elements - Not an MPIIO file' end if ! call MPI_File_read_all(fileh, buf20, 20, mpi_character, mpi_status_ignore, ierr) @@ -2017,8 +2028,8 @@ subroutine TRconvert_matel_j0_eigen(jrot) if(blacs_size.gt.1) then call pdgemm('T','N',Neigenroots,dimen,dimen,alpha,psi,1,1,desc_psi,& extF_me,1,1,desc_gmat,beta,mat_t,1,1,desc_mat_t) - call pdgemm('N','N',Neigenroots,Neigenroots,dimen,alpha,mat_t,1,1,desc_mat_t,& - psi,1,1,desc_psi,beta,mat_s,1,1,desc_mat_s) + call pdgemm('N','T',Neigenroots,Neigenroots,dimen,alpha,mat_t,1,1,desc_mat_t,& + psi_t,1,1,desc_mat_t,beta,mat_s,1,1,desc_mat_s) else call dgemm('T','N',Neigenroots,dimen,dimen,alpha,psi,dimen,& extF_me,dimen,beta,mat_t,Neigenroots) From a8ed85fccf03637e488653129f486e92614d4f1b Mon Sep 17 00:00:00 2001 From: Arjen Tamerus Date: Mon, 9 Sep 2019 16:51:24 +0100 Subject: [PATCH 22/79] Clean up redundant statements + minor makefile fix --- makefile | 6 ++---- mpi_aux.f90 | 8 -------- 2 files changed, 2 insertions(+), 12 deletions(-) diff --git a/makefile b/makefile index 957aebd..c7bb7f3 100644 --- a/makefile +++ b/makefile @@ -12,7 +12,6 @@ pot_user = pot_ch4 PLAT = #PLAT = _2205_i17 -#FOR = ifort FOR = mpif90 FFLAGS = -qopenmp -xHost -O3 -ip -g3 -traceback #FFLAGS = -fopenmp -ffree-line-length-none -march=native -O3 -fcray-pointer -g3 @@ -20,8 +19,6 @@ FFLAGS = -qopenmp -xHost -O3 -ip -g3 -traceback #ARPACK = ~/libraries/ARPACK/libarpack_omp_64.a -#LAPACK = -L/usr/local/software/spack/spack-0.11.2/opt/spack/linux-rhel7-x86_64/gcc-5.4.0/openblas-0.2.20-gbzlk5wei7fsojje2fiwj7w5wssikb73/lib -lopenblas -#LAPACK = -mkl=parallel -lscalapack LAPACK = -mkl=parallel -lmkl_scalapack_lp64 -lmkl_blacs_intelmpi_lp64 #LAPACK = -L${MKLROOT}/lib/intel64 -Wl,--no-as-needed -lmkl_gf_lp64 -lmkl_sequential -lmkl_core -lpthread -lm -ldl @@ -85,7 +82,8 @@ pot_c2h4.o: accuracy.o moltype.o pot_c3h6.o: accuracy.o moltype.o pot_abcd.o: accuracy.o moltype.o lapack.o -mpi_aux.o: timer.o +input.o: accuracy.o +mpi_aux.o: accuracy.o timer.o clean: rm -f *.mod *.o diff --git a/mpi_aux.f90 b/mpi_aux.f90 index d865757..9e4081d 100644 --- a/mpi_aux.f90 +++ b/mpi_aux.f90 @@ -92,9 +92,6 @@ subroutine co_block_type_init(smat, dimx, dimy, descr, allocinfo, mpi_type) end subroutine co_block_type_init subroutine co_sum_double(x, root_process) - use accuracy - - implicit none real(rk), intent(inout), dimension(:,:) :: x integer, optional :: root_process @@ -259,9 +256,6 @@ subroutine co_init_distr(dimen, startdim, enddim, blocksize) end subroutine co_init_distr subroutine co_distr_data(x, tmp, blocksize, lb, ub) - use accuracy - - implicit none real(rk),dimension(:,lb:),intent(inout) :: x real(rk),dimension(:,:,:),intent(inout) :: tmp @@ -311,7 +305,6 @@ subroutine co_distr_data(x, tmp, blocksize, lb, ub) end subroutine co_distr_data subroutine co_read_matrix_distr(x, longdim, lb, ub, infile) - use accuracy real(rk),dimension(:,lb:),intent(out) :: x integer,intent(in) :: longdim, lb, ub @@ -341,7 +334,6 @@ subroutine co_read_matrix_distr(x, longdim, lb, ub, infile) end subroutine co_read_matrix_distr subroutine co_write_matrix_distr(x, longdim, lb, ub, outfile) - use accuracy real(rk),dimension(:,lb:),intent(in) :: x integer,intent(in) :: longdim, lb, ub From ab66ff799b383a8387fb988f58f4b7e5bf7f6755 Mon Sep 17 00:00:00 2001 From: Arjen Tamerus Date: Mon, 9 Sep 2019 16:52:23 +0100 Subject: [PATCH 23/79] [input.f90] output 'echo' statements only on master process --- input.f90 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/input.f90 b/input.f90 index f930b70..3c1fb88 100644 --- a/input.f90 +++ b/input.f90 @@ -1,6 +1,6 @@ MODULE input -!use accuracy +use accuracy ! Fortran90 input parsing module ! @@ -247,7 +247,7 @@ SUBROUTINE read_line(eof,inunit) ! Find last non-blank character 10 last=verify(char,space//tab,back=.true.) - if (echo) print "(a)", char(m:last) + if (echo) write(out,"(a)") char(m:last) ! Look for concatenation string if (lc .gt. 0 .and. last .ge. lc) then more=(char(last-lc+1:last) .eq. concat) From 2f3edf81137d27830fc79ea22c12041907fb8c0a Mon Sep 17 00:00:00 2001 From: Arjen Tamerus Date: Tue, 10 Sep 2019 10:23:20 +0100 Subject: [PATCH 24/79] [perturbation.f90]Fix small mistake in co_sum call (allreduce when reduce-to-root is fine) --- perturbation.f90 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/perturbation.f90 b/perturbation.f90 index 153af2c..55f02f3 100644 --- a/perturbation.f90 +++ b/perturbation.f90 @@ -7340,7 +7340,7 @@ subroutine PThamiltonian_contract(jrot) ! do isym = 1,sym%Nrepresen if (.not.job%select_gamma(isym)) cycle - call co_sum(smat(isym)%coeffs) + call co_sum(smat(isym)%coeffs,0) enddo call TimerStop('Calculating the Hamiltonian matrix') ! From 88913fdb0e39b3c7dd4d72a797b50c14ded4ee9c Mon Sep 17 00:00:00 2001 From: Arjen Tamerus Date: Tue, 12 Nov 2019 15:18:16 +0000 Subject: [PATCH 25/79] Rough fix to avoid 'Non-diagonal element' issue --- mpi_aux.f90 | 27 +++++++++++- perturbation.f90 | 112 ++++++++++++++++++++++++++++------------------- tran.f90 | 3 ++ 3 files changed, 94 insertions(+), 48 deletions(-) diff --git a/mpi_aux.f90 b/mpi_aux.f90 index 9e4081d..fd52991 100644 --- a/mpi_aux.f90 +++ b/mpi_aux.f90 @@ -15,7 +15,8 @@ module mpi_aux public mpi_real_size, mpi_int_size interface co_sum - module procedure :: co_sum_double + module procedure :: co_sum_double_1d + module procedure :: co_sum_double_2d end interface integer,dimension(:),allocatable :: proc_sizes, proc_offsets, send_or_recv @@ -91,7 +92,29 @@ subroutine co_block_type_init(smat, dimx, dimy, descr, allocinfo, mpi_type) end subroutine co_block_type_init - subroutine co_sum_double(x, root_process) + subroutine co_sum_double_1d(x, root_process) + + real(rk), intent(inout), dimension(:) :: x + integer, optional :: root_process + + if (comm_size.eq.1) return + call TimerStart('co_sum_double') + + if (present(root_process)) then + + if (mpi_rank .eq. 0) then + call mpi_reduce(mpi_in_place, x, size(x), mpi_double_precision, mpi_sum, 0, mpi_comm_world) + else + call mpi_reduce(x, x, size(x), mpi_double_precision, mpi_sum, 0, mpi_comm_world) + endif + else + call mpi_allreduce(mpi_in_place, x, size(x), mpi_double_precision, mpi_sum, mpi_comm_world) + end if + + call TimerStop('co_sum_double') + end subroutine + + subroutine co_sum_double_2d(x, root_process) real(rk), intent(inout), dimension(:,:) :: x integer, optional :: root_process diff --git a/perturbation.f90 b/perturbation.f90 index 55f02f3..dde84b9 100644 --- a/perturbation.f90 +++ b/perturbation.f90 @@ -6919,14 +6919,14 @@ subroutine PThamiltonian_contract(jrot) if (job%verbose>=5) write(out,"(' N Arrays of ',f12.5,'Gb each will be allocated (N is the number of processors)')")& real(sym%Nrepresen,rk)*real(PT%max_deg_size,rk)*real(max_dim,rk)*real(rk,rk)/1024.0_rk**3 ! - !$omp parallel private(mat_t,alloc_p) + ! $omp parallel private(mat_t,alloc_p) allocate (mat_t(sym%Nrepresen,PT%max_deg_size,max_dim),stat=alloc_p) if (alloc_p/=0) then write(out,"('PThamiltonian_contract: mat_t - out of memory')") stop 'PThamiltonian_contract - mat_t out of memory' endif ! - !$omp do private(irow,isym,dimen_s,iterm,ielem,k_i,dimen_row,istart,iend) schedule(static) + ! $omp do private(irow,isym,dimen_s,iterm,ielem,k_i,dimen_row,istart,iend) schedule(static) do irow = 1,dimen ! ! ithread = omp_get_thread_num() @@ -6963,10 +6963,10 @@ subroutine PThamiltonian_contract(jrot) enddo ! enddo - !$omp enddo + ! $omp enddo ! deallocate (mat_t) - !$omp end parallel + ! $omp end parallel ! if (associated(grot)) deallocate(grot) if (associated(gcor)) deallocate(gcor) @@ -7124,14 +7124,14 @@ subroutine PThamiltonian_contract(jrot) call PTrestore_rot_kinetic_matrix_elements(jrot,task,iunit,dimen,ncontr,maxcontr) endif ! - !$omp parallel private(mat_t,alloc_p) + ! $omp parallel private(mat_t,alloc_p) allocate (mat_t(sym%Nrepresen,PT%max_deg_size,max_dim),stat=alloc_p) if (alloc_p/=0) then write(out,"('PThamiltonian_contract: mat_t - out of memory')") stop 'PThamiltonian_contract - mat_t out of memory' endif ! - !$omp do private(irow,isym,dimen_s,iterm,ielem,k_i,dimen_row,istart,iend) schedule(static) + ! $omp do private(irow,isym,dimen_s,iterm,ielem,k_i,dimen_row,istart,iend) schedule(static) do irow = 1,dimen ! ! ithread = omp_get_thread_num() @@ -7170,10 +7170,10 @@ subroutine PThamiltonian_contract(jrot) enddo ! enddo - !$omp enddo + ! $omp enddo ! deallocate (mat_t) - !$omp end parallel + ! $omp end parallel ! do k_i = 1,3 do k_j = 1,3 @@ -7195,14 +7195,14 @@ subroutine PThamiltonian_contract(jrot) call PTrestore_rot_kinetic_matrix_elements(jrot,task,iunit,dimen,ncontr,maxcontr) endif ! - !$omp parallel private(mat_t,alloc_p) + ! $omp parallel private(mat_t,alloc_p) allocate (mat_t(sym%Nrepresen,PT%max_deg_size,max_dim),stat=alloc_p) if (alloc_p/=0) then write(out,"('PThamiltonian_contract: mat_t - out of memory')") stop 'PThamiltonian_contract - mat_t out of memory' endif ! - !$omp do private(irow,isym,dimen_s,iterm,ielem,k_i,dimen_row,istart,iend) schedule(static) + ! $omp do private(irow,isym,dimen_s,iterm,ielem,k_i,dimen_row,istart,iend) schedule(static) do irow = 1,dimen ! ! ithread = 1 @@ -7243,10 +7243,10 @@ subroutine PThamiltonian_contract(jrot) enddo ! enddo - !$omp enddo + ! $omp enddo ! deallocate (mat_t) - !$omp end parallel + ! $omp end parallel ! do k_i = 1,3 deallocate(gcor(k_i)%me) @@ -7272,14 +7272,14 @@ subroutine PThamiltonian_contract(jrot) call PTrestore_rot_kinetic_matrix_elements(jrot,task,iunit,dimen,ncontr,maxcontr) endif ! - !$omp parallel private(mat_t,alloc_p) + ! $omp parallel private(mat_t,alloc_p) allocate (mat_t(sym%Nrepresen,PT%max_deg_size,max_dim),stat=alloc_p) if (alloc_p/=0) then write(out,"('PThamiltonian_contract: mat_t - out of memory')") stop 'PThamiltonian_contract - mat_t out of memory' endif ! - !$omp do private(irow,isym,dimen_s,iterm,ielem,k_i,dimen_row,istart,iend) schedule(static) + ! $omp do private(irow,isym,dimen_s,iterm,ielem,k_i,dimen_row,istart,iend) schedule(static) do irow = 1,dimen ! ! ithread = 1 @@ -7320,10 +7320,10 @@ subroutine PThamiltonian_contract(jrot) enddo ! enddo - !$omp enddo + ! $omp enddo ! deallocate (mat_t) - !$omp end parallel + ! $omp end parallel ! deallocate(hvib%me) ! @@ -7338,14 +7338,18 @@ subroutine PThamiltonian_contract(jrot) ! endif ! - do isym = 1,sym%Nrepresen - if (.not.job%select_gamma(isym)) cycle - call co_sum(smat(isym)%coeffs,0) - enddo + !do isym = 1,sym%Nrepresen + ! if (.not.job%select_gamma(isym)) cycle + ! call co_sum(smat(isym)%coeffs,0) + !enddo call TimerStop('Calculating the Hamiltonian matrix') ! if (job%verbose>=4) write(out,"('...done!')") if (mpi_rank.eq.0) then!mpiio + do isym = 1,sym%Nrepresen + if (.not.job%select_gamma(isym)) cycle + write(6,*) "Todays sum:", sum(smat(isym)%coeffs) + enddo ! Correction for the case we do not compute the vibrational part of the ! Hamiltonian: ! @@ -7852,7 +7856,7 @@ subroutine PTrestore_rot_kinetic_matrix_elements_mpi(jrot, task, fileh, dimen, & if (mpi_rank .eq. 0) write (out,"(' Vib. kinetic checkpoint file ',a)") job%kinetmat_file if (mpi_rank .eq. 0) write (out,"(' Actual and stored basis sizes at J=0 do not agree ',2i9)") PT%Maxcontracts,ncontr call mpi_barrier(mpi_comm_world, ierr) - stop 'PTrestore_rot_kinetic_matrix_elements - in file - illegal nroots ' + stop 'MPI_PTrestore_rot_kinetic_matrix_elements - in file - illegal nroots ' end if rootsize = int(ncontr*(ncontr+1)/2,hik) @@ -7871,7 +7875,7 @@ subroutine PTrestore_rot_kinetic_matrix_elements_mpi(jrot, task, fileh, dimen, & if (readbuf(1:10)/='icontr_cnu') then if (mpi_rank .eq. 0) write (out,"(' Vib. kinetic checkpoint file ',a,': icontr_cnu is missing ',a)") job%kinetmat_file,readbuf(1:10) call mpi_barrier(mpi_comm_world, ierr) - stop 'PTrestore_rot_kinetic_matrix_elements - in file - icontr_cnu missing' + stop 'MPI_PTrestore_rot_kinetic_matrix_elements - in file - icontr_cnu missing' end if file_offset = (PT%Nclasses+1)*int(ncontr,MPI_OFFSET_KIND)*mpi_int_size @@ -7881,7 +7885,7 @@ subroutine PTrestore_rot_kinetic_matrix_elements_mpi(jrot, task, fileh, dimen, & if (readbuf(1:11)/='icontr_ideg') then if (mpi_rank .eq. 0) write (out,"(' Vib. kinetic checkpoint file ',a,': icontr_ideg is missing ',a)") job%kinetmat_file,readbuf(1:11) call mpi_barrier(mpi_comm_world, ierr) - stop 'PTrestore_rot_kinetic_matrix_elements - in file - icontr_ideg missing' + stop 'MPI_PTrestore_rot_kinetic_matrix_elements - in file - icontr_ideg missing' end if file_offset = (PT%Nclasses+1)*int(ncontr,MPI_OFFSET_KIND)*mpi_int_size @@ -7905,7 +7909,7 @@ subroutine PTrestore_rot_kinetic_matrix_elements_mpi(jrot, task, fileh, dimen, & if (readbuf(1:10)/='icontr_cnu') then if (mpi_rank .eq. 0) write (out,"(' Vib. kinetic checkpoint file ',a,': icontr_cnu is missing ',a)") job%kinetmat_file,readbuf(1:10) call mpi_barrier(mpi_comm_world, ierr) - stop 'PTrestore_rot_kinetic_matrix_elements - in file - icontr_cnu missing' + stop 'MPI_PTrestore_rot_kinetic_matrix_elements - in file - icontr_cnu missing' end if call MPI_File_read_all(fileh, PT%icontr_cnu, (PT%Nclasses+1)*ncontr, mpi_integer, mpi_status_ignore, ierr) @@ -7914,7 +7918,7 @@ subroutine PTrestore_rot_kinetic_matrix_elements_mpi(jrot, task, fileh, dimen, & if (readbuf(1:11)/='icontr_ideg') then if (mpi_rank .eq. 0) write (out,"(' Vib. kinetic checkpoint file ',a,': icontr_ideg is missing ',a)") job%kinetmat_file,readbuf(1:11) call mpi_barrier(mpi_comm_world, ierr) - stop 'PTrestore_rot_kinetic_matrix_elements - in file - icontr_ideg missing' + stop 'MPI_PTrestore_rot_kinetic_matrix_elements - in file - icontr_ideg missing' end if call MPI_File_read_all(fileh, PT%icontr_ideg, (PT%Nclasses+1)*ncontr, mpi_integer, mpi_status_ignore, ierr) @@ -7927,7 +7931,7 @@ subroutine PTrestore_rot_kinetic_matrix_elements_mpi(jrot, task, fileh, dimen, & if (readbuf(1:7)/='vib-rot') then if (mpi_rank .eq. 0) write (out,"(' Vib. kinetic checkpoint file ',a,': label vib-rot is missing ',a)") job%kinetmat_file,readbuf(1:7) call mpi_barrier(mpi_comm_world, ierr) - stop 'PTrestore_rot_kinetic_matrix_elements - in file - vib-rot missing' + stop 'MPI_PTrestore_rot_kinetic_matrix_elements - in file - vib-rot missing' end if call close_chkptfile_mpi(fileh) @@ -7956,7 +7960,7 @@ subroutine PTrestore_rot_kinetic_matrix_elements_mpi(jrot, task, fileh, dimen, & if (maxcontr>ncontr) then if (mpi_rank .eq. 0) write (out,"(' Actual and stored basis sizes at J=0 do not agree (maxcontr,ncontr) ',2i8)") maxcontr,ncontr call mpi_barrier(mpi_comm_world, ierr) - stop 'PTrestore_rot_kinetic_matrix_elements - in file - illegal ncontr ' + stop 'MPI_PTrestore_rot_kinetic_matrix_elements - in file - illegal ncontr ' end if case('rot') ! @@ -7973,7 +7977,7 @@ subroutine PTrestore_rot_kinetic_matrix_elements_mpi(jrot, task, fileh, dimen, & if (readbuf(1:5)/='g_rot') then if(mpi_rank .eq. 0) write (out,"(' Vib. kinetic checkpoint file ',a,': g_rot is missing ',a)") job%kinetmat_file,readbuf(1:5) call mpi_barrier(mpi_comm_world, ierr) - stop 'PTrestore_rot_kinetic_matrix_elements - in file - g_rot missing' + stop 'MPI_PTrestore_rot_kinetic_matrix_elements - in file - g_rot missing' end if ! allocate(grot(3,3),stat=ierr) @@ -8036,7 +8040,7 @@ subroutine PTrestore_rot_kinetic_matrix_elements_mpi(jrot, task, fileh, dimen, & if (readbuf(1:5)/='g_cor') then if (mpi_rank .eq. 0) write (out,"(' Vib. kinetic checkpoint file ',a,': g_cor is missing ',a)") job%kinetmat_file,readbuf(1:5) call mpi_barrier(mpi_comm_world, ierr) - stop 'PTrestore_rot_kinetic_matrix_elements - in file - g_cor missing' + stop 'MPI_PTrestore_rot_kinetic_matrix_elements - in file - g_cor missing' end if ! allocate(gcor(3),stat=ierr) @@ -8095,7 +8099,7 @@ subroutine PTrestore_rot_kinetic_matrix_elements_mpi(jrot, task, fileh, dimen, & if (readbuf(1:5)/='g_rot') then if (mpi_rank .eq. 0) write (out,"(' Vib. kinetic checkpoint file ',a,': g_rot is missing ',a)") job%kinetmat_file,readbuf(1:5) call mpi_barrier(mpi_comm_world, ierr) - stop 'PTrestore_rot_kinetic_matrix_elements - in file - g_rot missing' + stop 'MPI_PTrestore_rot_kinetic_matrix_elements - in file - g_rot missing' end if ! file_offset = 9*int(ncontr,MPI_OFFSET_KIND)*ncontr*mpi_real_size @@ -8105,7 +8109,7 @@ subroutine PTrestore_rot_kinetic_matrix_elements_mpi(jrot, task, fileh, dimen, & if (readbuf(1:5)/='g_cor') then if (mpi_rank .eq. 0) write (out,"(' Vib. kinetic checkpoint file ',a,': g_cor is missing ',a)") job%kinetmat_file,readbuf(1:5) call mpi_barrier(mpi_comm_world, ierr) - stop 'PTrestore_rot_kinetic_matrix_elements - in file - g_cor missing' + stop 'MPI_PTrestore_rot_kinetic_matrix_elements - in file - g_cor missing' end if ! file_offset = 3*int(ncontr,MPI_OFFSET_KIND)*ncontr*mpi_real_size @@ -8126,7 +8130,7 @@ subroutine PTrestore_rot_kinetic_matrix_elements_mpi(jrot, task, fileh, dimen, & write (out,"(' Re-do MATELEM SAVE SPLIT or use MATELEM SPLIT READ')") endif call mpi_barrier(mpi_comm_world, ierr) - stop 'PTrestore_rot_kinetic_matrix_elements - in file - hvib or End missing' + stop 'MPI_PTrestore_rot_kinetic_matrix_elements - in file - hvib or End missing' end if ! !allocate(mat_(maxcontr,maxcontr),stat=ierr) @@ -8147,7 +8151,7 @@ subroutine PTrestore_rot_kinetic_matrix_elements_mpi(jrot, task, fileh, dimen, & if (readbuf(1:16)/='End Kinetic part') then if (mpi_rank .eq. 0) write (out,"(' Vib. kinetic checkpoint file ',a,' has bogus footer: ',a)") job%kinetmat_file,readbuf(1:16) call mpi_barrier(mpi_comm_world, ierr) - stop 'PTrestore_rot_kinetic_matrix_elements - bogus file format' + stop 'MPI_PTrestore_rot_kinetic_matrix_elements - bogus file format' end if ! call close_chkptfile_mpi(fileh) @@ -8881,7 +8885,7 @@ subroutine PTrestore_rot_kinetic_matrix_elements(jrot,task,chkptIO,dimen,ncontr, ! read(chkptIO) buf18(1:4) if (buf18(1:4)/='hvib') then - write (out,"(' Vib. kinetic checkpoint file ',a,' has bogus footer: ',a)") job%kinetmat_file,buf18(1:16) + write (out,"(' Vib. kinetic checkpoint file ',a,' has bogus footer: ',a)") job%kinetmat_file,buf18(1:4) stop 'PTrestore_rot_kinetic_matrix_elements - bogus file format' end if ! @@ -9037,7 +9041,7 @@ recursive subroutine symm_mat_element_vector_k(jrot,irow,ijterm,func,mat_t,no_di integer(ik) :: jrow,ideg,jdeg,isym,jsym,iL,iR,iterm,jterm,icontr,jcontr real(rk) :: hcontr(PT%max_deg_size,PT%max_deg_size) real(rk) :: vec_i(PT%max_deg_size),vec_j(PT%max_deg_size) - logical :: escape + !logical :: escape ! !call TimerStart('Symmetrized Hamiltonian - one column') ! @@ -9048,7 +9052,7 @@ recursive subroutine symm_mat_element_vector_k(jrot,irow,ijterm,func,mat_t,no_di isize = PT%Index_deg(irow)%size1 ! do jrow = 1,irow - escape = .false. + !escape = .false. ! if ( present(no_diagonalization).and.no_diagonalization.and.jrow/=irow ) cycle ! @@ -9077,16 +9081,20 @@ recursive subroutine symm_mat_element_vector_k(jrot,irow,ijterm,func,mat_t,no_di ! Matrix elements ! if (jcontr .lt. co_startdim .or. jcontr .gt. co_enddim) then - escape = .true. - exit + !write(*,*) "ESCAPE:", jcontr, co_startdim, co_enddim + !escape = .true. + hcontr(ideg,jdeg) = 0.0_rk + !exit + else + hcontr(ideg,jdeg) = func(icontr,jcontr,jrot,k_i,k_j,tau_i,tau_j) endif - hcontr(ideg,jdeg) = func(icontr,jcontr,jrot,k_i,k_j,tau_i,tau_j) ! enddo ! enddo - if (escape) cycle + call co_sum(hcontr) + !if (escape) cycle ! do isym = 1,sym%Nrepresen ! @@ -10688,7 +10696,8 @@ subroutine diagonalization_contract(jrot,gamma,dimen_s,mat,zpe,rlevel,total_root ! endif ! - if (gamma==1) write(out,"(/'Zero-point-energy is ',f18.6)") ZPE + if (gamma==1) write(6,"(/'Todays Zero-point-energy is ',f18.6)") ZPE + if (gamma==1) write(6,*) energy ! if (trim(job%IOeigen_action)=='SAVE'.or.trim(job%IOeigen_action)=='APPEND') then ! @@ -15384,14 +15393,20 @@ subroutine PTcontracted_matelem_class(jrot) job_is ='Vib. matrix elements of the rot. kinetic part' if (trim(job%kinetmat_format).eq.'MPIIO') then call MPI_File_open(mpi_comm_world, job%kinetmat_file, mpi_mode_wronly+mpi_mode_create, mpi_info_null, chkptMPIIO, ierr) + call MPI_File_set_errhandler(chkptMPIIO, MPI_ERRORS_ARE_FATAL) + mpioffset=0 + call MPI_File_set_size(chkptMPIIO, mpioffset, ierr) - if (mpi_rank.eq.0) then !AT - call TimerStart('mpiiosingle') !AT + + call MPI_File_seek(chkptMPIIO, mpioffset, MPI_SEEK_CUR, ierr) + call MPI_File_write(chkptMPIIO,'[MPIIO]',7,mpi_character,mpi_status_ignore,ierr) call MPI_File_write(chkptMPIIO,'Start Kinetic part',18,mpi_character,mpi_status_ignore,ierr) + if (mpi_rank.eq.0) then !AT + call TimerStart('mpiiosingle') !AT ! ! store the bookkeeping information about the contr. basis set ! @@ -15399,6 +15414,8 @@ subroutine PTcontracted_matelem_class(jrot) call TimerStop('mpiiosingle') !AT endif + call MPI_Barrier(mpi_comm_world, ierr) + call MPI_File_seek(chkptMPIIO, mpioffset, MPI_SEEK_END, ierr) else call IOStart(trim(job_is),chkptIO) ! @@ -15723,6 +15740,7 @@ subroutine PTcontracted_matelem_class(jrot) do jcoeff=((b-1)*mdimen_p)+1,b*mdimen_p if (jcoeff .gt. PT%Maxcontracts) cycle grot_t(jcoeff,icoeff) = grot_t(jcoeff,icoeff) + hrot_t(jcoeff,icoeff) + !write(6,*) jcoeff, icoeff, grot_t(jcoeff,icoeff) enddo enddo !$omp end parallel do @@ -15733,11 +15751,13 @@ subroutine PTcontracted_matelem_class(jrot) ! call co_distr_data(grot_t, recvbuf, mdimen_p, startdim, enddim) ! + !$omp parallel do private(icoeff,jcoeff) shared(grot_t) schedule(static) do icoeff=startdim,enddim do jcoeff=1,icoeff-1 grot_t(jcoeff,icoeff) = -1*grot_t(jcoeff,icoeff) enddo enddo + !$omp end parallel do ! if (job%IOmatelem_divide) then ! @@ -15749,9 +15769,9 @@ subroutine PTcontracted_matelem_class(jrot) ! else ! - !$omp parallel do private(icoeff) shared(gcor_t) schedule(dynamic) + !$omp parallel do private(icoeff) shared(gcor_t,grot_t) schedule(static) do icoeff=startdim,enddim - gcor_t(icoeff,:) = gcor_t(icoeff,:)+grot_t(icoeff,:) + gcor_t(:,icoeff) = gcor_t(:,icoeff)+grot_t(:,icoeff) enddo !$omp end parallel do ! @@ -16163,7 +16183,7 @@ subroutine PTcontracted_matelem_class(jrot) call mpi_barrier(mpi_comm_world, ierr) if(mpi_rank.eq.0) then call MPI_File_seek(chkptMPIIO, mpioffset, MPI_SEEK_END) - call MPI_File_write(chkptMPIIO,'End Kinetic Part',16,mpi_character,mpi_status_ignore,ierr) + call MPI_File_write(chkptMPIIO,'End Kinetic part',16,mpi_character,mpi_status_ignore,ierr) endif call MPI_File_close(chkptMPIIO, ierr) else diff --git a/tran.f90 b/tran.f90 index 229ce6c..3321dab 100644 --- a/tran.f90 +++ b/tran.f90 @@ -902,6 +902,9 @@ subroutine read_eigenval(njval, jval, error) write(out, '(a)') '...done!' end if ! + ! Keep going if all is well + cycle + ! Error condition (file could not be opened) 16 continue if (present(error)) error = 1 ! From 696463e93a45108f8d58ec150376fc493b81d231 Mon Sep 17 00:00:00 2001 From: Arjen Tamerus Date: Wed, 11 Mar 2020 14:47:39 +0000 Subject: [PATCH 26/79] Improve co_sum workaround by calling reduction operation only once per call to symm_mat_element_vector_k. Old method caused massive (3x) slowdown, now eliminated. --- mpi_aux.f90 | 28 +--------- perturbation.f90 | 140 ++++++++++++++++++++++++++--------------------- tran.f90 | 3 - 3 files changed, 82 insertions(+), 89 deletions(-) diff --git a/mpi_aux.f90 b/mpi_aux.f90 index fd52991..b6cda2e 100644 --- a/mpi_aux.f90 +++ b/mpi_aux.f90 @@ -15,8 +15,7 @@ module mpi_aux public mpi_real_size, mpi_int_size interface co_sum - module procedure :: co_sum_double_1d - module procedure :: co_sum_double_2d + module procedure :: co_sum_double end interface integer,dimension(:),allocatable :: proc_sizes, proc_offsets, send_or_recv @@ -92,9 +91,9 @@ subroutine co_block_type_init(smat, dimx, dimy, descr, allocinfo, mpi_type) end subroutine co_block_type_init - subroutine co_sum_double_1d(x, root_process) + subroutine co_sum_double(x, root_process) - real(rk), intent(inout), dimension(:) :: x + real(rk), intent(inout), dimension(..) :: x integer, optional :: root_process if (comm_size.eq.1) return @@ -114,27 +113,6 @@ subroutine co_sum_double_1d(x, root_process) call TimerStop('co_sum_double') end subroutine - subroutine co_sum_double_2d(x, root_process) - - real(rk), intent(inout), dimension(:,:) :: x - integer, optional :: root_process - - if (comm_size.eq.1) return - call TimerStart('co_sum_double') - - if (present(root_process)) then - - if (mpi_rank .eq. 0) then - call mpi_reduce(mpi_in_place, x, size(x), mpi_double_precision, mpi_sum, 0, mpi_comm_world) - else - call mpi_reduce(x, x, size(x), mpi_double_precision, mpi_sum, 0, mpi_comm_world) - endif - else - call mpi_allreduce(mpi_in_place, x, size(x), mpi_double_precision, mpi_sum, mpi_comm_world) - end if - - call TimerStop('co_sum_double') - end subroutine subroutine co_init_comms() integer :: ierr diff --git a/perturbation.f90 b/perturbation.f90 index dde84b9..a218982 100644 --- a/perturbation.f90 +++ b/perturbation.f90 @@ -7348,7 +7348,6 @@ subroutine PThamiltonian_contract(jrot) if (mpi_rank.eq.0) then!mpiio do isym = 1,sym%Nrepresen if (.not.job%select_gamma(isym)) cycle - write(6,*) "Todays sum:", sum(smat(isym)%coeffs) enddo ! Correction for the case we do not compute the vibrational part of the ! Hamiltonian: @@ -9028,7 +9027,7 @@ end subroutine PTrestore_rot_kinetic_matrix_elements ! recursive subroutine symm_mat_element_vector_k(jrot,irow,ijterm,func,mat_t,no_diagonalization) use mpi_aux - + ! integer(ik),intent(in) :: jrot,irow,ijterm(:,:) real(rk),external :: func real(rk),intent(out) :: mat_t(:,:,:) @@ -9039,63 +9038,77 @@ recursive subroutine symm_mat_element_vector_k(jrot,irow,ijterm,func,mat_t,no_di real(rk) :: mat_elem integer(ik) :: isize,jsize,ielem,jelem,k_i,k_j,tau_i,tau_j integer(ik) :: jrow,ideg,jdeg,isym,jsym,iL,iR,iterm,jterm,icontr,jcontr - real(rk) :: hcontr(PT%max_deg_size,PT%max_deg_size) real(rk) :: vec_i(PT%max_deg_size),vec_j(PT%max_deg_size) - !logical :: escape - ! - !call TimerStart('Symmetrized Hamiltonian - one column') - ! - mat_t = 0 - ! - cnu_i(:) = PT%contractive_space(:,irow) - ! - isize = PT%Index_deg(irow)%size1 - ! - do jrow = 1,irow - !escape = .false. - ! - if ( present(no_diagonalization).and.no_diagonalization.and.jrow/=irow ) cycle - ! - cnu_j(:) = PT%contractive_space(:,jrow) - ! - jsize = PT%Index_deg(jrow)%size1 - ! - do ideg = 1,isize - ! - deg_i(:) = PT%Index_deg(irow)%icoeffs(:,ideg) - ! - do jdeg = 1,jsize - ! - deg_j(:) = PT%Index_deg(jrow)%icoeffs(:,jdeg) - ! - !iroot = contr(iclass)%iroot(cnu_i(iclass),deg_i(iclass)) - !jroot = contr(iclass)%iroot(cnu_j(iclass),deg_j(iclass)) - ! - icontr = PT%icase2icontr(irow,ideg) - jcontr = PT%icase2icontr(jrow,jdeg) - k_i = PT%rot_index(cnu_i(0),deg_i(0))%k - k_j = PT%rot_index(cnu_j(0),deg_j(0))%k - tau_i = PT%rot_index(cnu_i(0),deg_i(0))%tau - tau_j = PT%rot_index(cnu_j(0),deg_j(0))%tau - ! - ! Matrix elements - ! - if (jcontr .lt. co_startdim .or. jcontr .gt. co_enddim) then - !write(*,*) "ESCAPE:", jcontr, co_startdim, co_enddim - !escape = .true. - hcontr(ideg,jdeg) = 0.0_rk - !exit - else - hcontr(ideg,jdeg) = func(icontr,jcontr,jrot,k_i,k_j,tau_i,tau_j) - endif + ! + real(rk), dimension(:,:,:), allocatable :: hcontr + ! + !call TimerStart('Symmetrized Hamiltonian - one column') + ! + mat_t = 0 + ! + cnu_i(:) = PT%contractive_space(:,irow) + ! + isize = PT%Index_deg(irow)%size1 + ! + ! AT: hcontr is now an array of irow * PT%max_deg_size^2. This way we can calculate all hcontr values in advance, + ! collect them to root, then run the matelem calculation loop. + ! The reduction to root is necessary as we are doing apparently random access over a distributed matrix. + allocate(hcontr(PT%max_deg_size,PT%max_deg_size,irow)) + ! + do jrow = 1,irow + ! + if ( present(no_diagonalization).and.no_diagonalization.and.jrow/=irow ) cycle + ! + cnu_j(:) = PT%contractive_space(:,jrow) + ! + jsize = PT%Index_deg(jrow)%size1 + ! + do ideg = 1,isize + ! + deg_i(:) = PT%Index_deg(irow)%icoeffs(:,ideg) + ! + do jdeg = 1,jsize + !hcontr(ideg,jdeg) = 0.0_rk + ! + deg_j(:) = PT%Index_deg(jrow)%icoeffs(:,jdeg) + ! + !iroot = contr(iclass)%iroot(cnu_i(iclass),deg_i(iclass)) + !jroot = contr(iclass)%iroot(cnu_j(iclass),deg_j(iclass)) + ! + icontr = PT%icase2icontr(irow,ideg) + jcontr = PT%icase2icontr(jrow,jdeg) + k_i = PT%rot_index(cnu_i(0),deg_i(0))%k + k_j = PT%rot_index(cnu_j(0),deg_j(0))%k + tau_i = PT%rot_index(cnu_i(0),deg_i(0))%tau + tau_j = PT%rot_index(cnu_j(0),deg_j(0))%tau + ! + ! Matrix elements + ! + if (jcontr .lt. co_startdim .or. jcontr .gt. co_enddim) then + hcontr(ideg,jdeg,jrow) = 0.0_rk + else + hcontr(ideg,jdeg,jrow) = func(icontr,jcontr,jrot,k_i,k_j,tau_i,tau_j) + endif - ! - enddo - ! - enddo - call co_sum(hcontr) - !if (escape) cycle - ! + ! + enddo + ! + enddo + end do + ! + ! Collect all pre-calculated hcontr values to MPI root. Non-local values have been initialised to 0 so it's safe to just do + ! MPI_SUM. + call co_sum(hcontr, 0) + !if (mpi_rank .eq. 0) then + ! call mpi_reduce(mpi_in_place, hcontr, size(hcontr), mpi_double_precision, mpi_sum, 0, mpi_comm_world) + !else + ! call mpi_reduce(hcontr, hcontr, size(hcontr), mpi_double_precision, mpi_sum, 0, mpi_comm_world) + !end if + ! + ! We could do an allreduce above then distribute this loop, but all subsequent calculation is serialised so far. + ! TODO future work? + if (mpi_rank .eq. 0) then + do jrow=1,irow do isym = 1,sym%Nrepresen ! iterm = ijterm(irow,isym) @@ -9112,7 +9125,7 @@ recursive subroutine symm_mat_element_vector_k(jrot,irow,ijterm,func,mat_t,no_di ! vec_j(1:jsize) = PT%irr(jsym)%repres(jterm+jelem,1,1:jsize) ! - vec_j(1:isize) = matmul(hcontr(1:isize,1:jsize),vec_j(1:jsize)) + vec_j(1:isize) = matmul(hcontr(1:isize,1:jsize,jrow),vec_j(1:jsize)) ! mat_elem = dot_product(vec_i(1:isize),vec_j(1:isize)) ! @@ -9181,9 +9194,12 @@ recursive subroutine symm_mat_element_vector_k(jrot,irow,ijterm,func,mat_t,no_di enddo ! enddo - ! - !call TimerStop('Symmetrized Hamiltonian - one column') - ! + endif + ! + deallocate(hcontr) + ! + !call TimerStop('Symmetrized Hamiltonian - one column') + ! end subroutine symm_mat_element_vector_k @@ -16509,8 +16525,10 @@ subroutine write_divided_slice_mpi(islice,name,suffix,N,field) offset = 0 call MPI_File_open(mpi_comm_world, filename, mpi_mode_wronly+mpi_mode_create, mpi_info_null, chkptMPIIO, ierr) call MPI_File_set_size(chkptMPIIO, offset, ierr) + call mpi_barrier(mpi_comm_world, ierr) ! if(mpi_rank .eq. 0) call MPI_File_write(chkptMPIIO,name,len(trim(name)),mpi_character,mpi_status_ignore,ierr) + call mpi_barrier(mpi_comm_world, ierr) ! call co_write_matrix_distr(field, N, co_startdim, co_enddim,chkptMPIIO) ! diff --git a/tran.f90 b/tran.f90 index 3321dab..c612366 100644 --- a/tran.f90 +++ b/tran.f90 @@ -1451,7 +1451,6 @@ subroutine TRconvert_matel_j0_eigen(jrot) ! enddo else - write(*,*) "TODO: This info2gl loop needs to be verified for correctness@TRAN.f90" allocate(vec(dimen),stat = info) ! do ilevel = 1,Neigenlevels @@ -1519,11 +1518,9 @@ subroutine TRconvert_matel_j0_eigen(jrot) ! treat_vibration = .false. ! - !call PTstore_icontr_cnu(Neigenroots,chkptIO,job%IOj0matel_action) call PTstorempi_icontr_cnu(Neigenroots,fileh_w,job%IOj0matel_action) ! if (job%vib_rot_contr) then - !write(chkptIO) 'vib-rot' call MPI_File_write(fileh_w, 'vib-rot', 7, mpi_character, mpi_status_ignore, ierr) endif else From aa47a1bd07ecdf03426ca06bad2511ccaa3812e4 Mon Sep 17 00:00:00 2001 From: Arjen Tamerus Date: Mon, 27 Apr 2020 15:23:32 +0100 Subject: [PATCH 27/79] Refactored MPI-IO to be more robust across implementations --- mpi_aux.f90 | 61 ++++++++++++++++++------------- perturbation.f90 | 95 ++++++++++++++++++++++++++++++------------------ tran.f90 | 38 +++++++++++-------- 3 files changed, 119 insertions(+), 75 deletions(-) diff --git a/mpi_aux.f90 b/mpi_aux.f90 index b6cda2e..457ec8d 100644 --- a/mpi_aux.f90 +++ b/mpi_aux.f90 @@ -5,7 +5,7 @@ module mpi_aux implicit none public co_init_comms, co_finalize_comms, co_init_distr, co_distr_data, co_write_matrix_distr, co_read_matrix_distr - public co_block_type_init + public co_block_type_init, co_read_matrix_distr_ordered public send_or_recv, comm_size, mpi_rank public co_startdim, co_enddim @@ -18,13 +18,13 @@ module mpi_aux module procedure :: co_sum_double end interface - integer,dimension(:),allocatable :: proc_sizes, proc_offsets, send_or_recv - integer :: comm_size, mpi_rank - integer :: co_startdim, co_enddim - logical :: comms_inited = .false., distr_inited=.false. - type(MPI_Datatype) :: mpitype_column - type(MPI_Datatype),dimension(:), allocatable :: mpi_blocktype - integer :: mpi_real_size, mpi_int_size + integer,dimension(:),allocatable :: proc_sizes, proc_offsets, send_or_recv + integer :: comm_size, mpi_rank + integer :: co_startdim, co_enddim + logical :: comms_inited = .false., distr_inited=.false. + type(MPI_Datatype) :: mpitype_column + type(MPI_Datatype),dimension(:), allocatable :: mpi_blocktype + integer :: mpi_real_size, mpi_int_size !blacs/pblas integer :: blacs_size, blacs_rank, blacs_ctxt @@ -153,7 +153,7 @@ subroutine co_init_distr(dimen, startdim, enddim, blocksize) integer,intent(out) :: startdim, enddim, blocksize integer,dimension(:),allocatable :: starts, ends integer :: localsize, proc_index, localsize_ - integer :: i, ierr, to_calc + integer :: i, ierr, to_calc, ioslice_width, ioslice_maxwidth if (.not. comms_inited) stop "COMMS NOT INITIALISED" !if (distr_inited) stop "DISTRIBUTION ALREADY INITIALISED" @@ -245,10 +245,12 @@ subroutine co_init_distr(dimen, startdim, enddim, blocksize) endif + ioslice_width = enddim-startdim+1 + ioslice_maxwidth = (int(1+real(dimen/comm_size))) if (comm_size .eq. 1) then call co_create_type_column(dimen,dimen,dimen) else - call co_create_type_column(dimen,comm_size*(int(1+real(dimen/comm_size))),enddim-startdim+1) + call co_create_type_column(dimen, comm_size*ioslice_maxwidth, ioslice_width) endif deallocate(starts,ends) @@ -305,6 +307,26 @@ subroutine co_distr_data(x, tmp, blocksize, lb, ub) end subroutine co_distr_data + subroutine co_read_matrix_distr_ordered(x, longdim, lb, ub, infile) + + real(rk),dimension(:,lb:),intent(out) :: x + integer,intent(in) :: longdim, lb, ub + + type(MPI_File),intent(inout) :: infile + type(MPI_Status) :: writestat + integer(kind=MPI_Offset_kind) :: offset_start,offset_end + integer :: readcount, ierr + + call mpi_barrier(mpi_comm_world, ierr) + + call TimerStart('MPI_read_matrix') + + call MPI_File_read_ordered(infile,x,1,mpitype_column,writestat,ierr) + + call TimerStop('MPI_read_matrix') + + end subroutine co_read_matrix_distr_ordered + subroutine co_read_matrix_distr(x, longdim, lb, ub, infile) real(rk),dimension(:,lb:),intent(out) :: x @@ -327,7 +349,7 @@ subroutine co_read_matrix_distr(x, longdim, lb, ub, infile) call MPI_File_read_all(infile,x,1,mpitype_column,writestat,ierr) call MPI_File_seek(infile, offset_end, MPI_SEEK_CUR) else - call MPI_File_read(infile,x,1,mpitype_column,writestat,ierr) + call MPI_File_read_all(infile,x,1,mpitype_column,writestat,ierr) endif call TimerStop('MPI_read_matrix') @@ -339,7 +361,7 @@ subroutine co_write_matrix_distr(x, longdim, lb, ub, outfile) real(rk),dimension(:,lb:),intent(in) :: x integer,intent(in) :: longdim, lb, ub type(MPI_File),intent(inout) :: outfile - integer :: ierr + integer :: writecount, ierr integer(kind=MPI_Offset_kind) :: offset_start, offset_end type(MPI_Status) :: writestat @@ -347,18 +369,7 @@ subroutine co_write_matrix_distr(x, longdim, lb, ub, outfile) call TimerStart('MPI_write_matrix') - if (comm_size.gt.1) then - offset_start = (lb-1)*int(longdim,MPI_OFFSET_KIND)*mpi_real_size - offset_end = 0 - call mpi_barrier(mpi_comm_world, ierr) - - call MPI_File_seek(outfile, offset_start, MPI_SEEK_END) - call MPI_File_write_all(outfile,x,1,mpitype_column,writestat,ierr) - call mpi_barrier(mpi_comm_world, ierr) - call MPI_File_seek(outfile, offset_end, MPI_SEEK_END) - else - call MPI_File_write(outfile,x,1,mpitype_column,writestat,ierr) - endif + call MPI_File_write_ordered(outfile,x,1,mpitype_column,writestat,ierr) call TimerStop('MPI_write_matrix') @@ -382,7 +393,7 @@ subroutine co_create_type_subarray(extent, coldim, rowdim, blockid, mpi_newtype) array_of_sizes(1) = comm_size * extent!coldim array_of_sizes(2) = extent array_of_subsizes(:) = extent - array_of_starts(1) = (blockid - 1) * extent + 0 + array_of_starts(1) = (blockid - 1) * extent array_of_starts(2) = 0 diff --git a/perturbation.f90 b/perturbation.f90 index a218982..c1d5a5f 100644 --- a/perturbation.f90 +++ b/perturbation.f90 @@ -7740,7 +7740,7 @@ subroutine open_chkptfile_mpi(fileh, filename, mode) amode = mpi_mode_wronly+mpi_mode_create end select - call MPI_File_Open(mpi_comm_world, filename, amode, mpi_info_null, fileh, ierr) + call MPI_File_open(mpi_comm_world, filename, amode, mpi_info_null, fileh, ierr) if (ierr.gt.0) then if (mpi_rank .eq. 0) write(*,*) "Error opening MPI-IO-formatted Vib. kinetic checkpoint file.", filename stop "MPI_PTrestore_rot_kinetic_matrix_elements - Error opening MATELEM MPI-IO input file" @@ -7878,7 +7878,7 @@ subroutine PTrestore_rot_kinetic_matrix_elements_mpi(jrot, task, fileh, dimen, & end if file_offset = (PT%Nclasses+1)*int(ncontr,MPI_OFFSET_KIND)*mpi_int_size - call mpi_file_seek(fileh, file_offset, MPI_SEEK_CUR) + call MPI_File_seek(fileh, file_offset, MPI_SEEK_CUR) call MPI_File_read_all(fileh, readbuf, 11, mpi_character, mpi_status_ignore, ierr) if (readbuf(1:11)/='icontr_ideg') then @@ -7888,7 +7888,9 @@ subroutine PTrestore_rot_kinetic_matrix_elements_mpi(jrot, task, fileh, dimen, & end if file_offset = (PT%Nclasses+1)*int(ncontr,MPI_OFFSET_KIND)*mpi_int_size - call mpi_file_seek(fileh, file_offset, MPI_SEEK_CUR) + call MPI_File_seek(fileh, file_offset, MPI_SEEK_CUR) + file_offset = file_offset+file_offset+mpi_int_size+46 + call MPI_File_seek_shared(fileh, file_offset, MPI_SEEK_SET) !deallocate(imat_t) @@ -8139,6 +8141,8 @@ subroutine PTrestore_rot_kinetic_matrix_elements_mpi(jrot, task, fileh, dimen, & call ArrayStart('hvib-matrix',ierr,1,kind(f_t),localrootsize) ! call co_read_matrix_distr(hvib%me, ncontr, co_startdim, co_enddim, fileh) + !file_offset = int(ncontr,MPI_OFFSET_KIND)*ncontr*mpi_real_size + !call MPI_File_seek(fileh, file_offset, MPI_SEEK_CUR) !call MPI_File_read_all(fileh, mat_(1:ncontr,1:ncontr), ncontr*ncontr, mpi_double_precision, mpi_status_ignore, ierr) ! !hvib%me(1:ncontr,1:ncontr) = mat_(1:ncontr,1:ncontr) @@ -15270,7 +15274,7 @@ subroutine PTcontracted_matelem_class(jrot) integer(ik) :: iclasses,ilevel,ideg,alloc,dimen,iterm,k1,k2,islice real(rk),allocatable :: me_t(:,:) real(rk),allocatable :: mat_t(:,:), grot_t(:,:),extF_t(:,:),gvib_t(:,:),hvib_t(:,:),fvib_t(:,:),& - matclass(:,:,:),hrot_t(:,:),gcor_t(:,:) + matclass(:,:,:),hrot_t(:,:),gcor_t(:,:),extF_test(:,:) real(rk),allocatable :: gcor_(:,:,:,:),grot_(:,:,:,:),extF_dvr(:,:,:),extF_r(:,:) real(rk),allocatable :: recvbuf(:,:,:) ! @@ -15375,6 +15379,7 @@ subroutine PTcontracted_matelem_class(jrot) if (comm_size .gt. 1) then mdimen_p = int(1+real(mdimen/comm_size)) mdimen_b = comm_size*mdimen_p + allocate(recvbuf(mdimen_p,mdimen_p,comm_size),stat=alloc) else mdimen_p = mdimen mdimen_b = mdimen @@ -15419,9 +15424,9 @@ subroutine PTcontracted_matelem_class(jrot) call MPI_File_seek(chkptMPIIO, mpioffset, MPI_SEEK_CUR, ierr) + if (mpi_rank.eq.0) then !AT call MPI_File_write(chkptMPIIO,'[MPIIO]',7,mpi_character,mpi_status_ignore,ierr) call MPI_File_write(chkptMPIIO,'Start Kinetic part',18,mpi_character,mpi_status_ignore,ierr) - if (mpi_rank.eq.0) then !AT call TimerStart('mpiiosingle') !AT ! ! store the bookkeeping information about the contr. basis set @@ -15431,7 +15436,7 @@ subroutine PTcontracted_matelem_class(jrot) call TimerStop('mpiiosingle') !AT endif call MPI_Barrier(mpi_comm_world, ierr) - call MPI_File_seek(chkptMPIIO, mpioffset, MPI_SEEK_END, ierr) + call MPI_File_seek_shared(chkptMPIIO, mpioffset, MPI_SEEK_END, ierr) else call IOStart(trim(job_is),chkptIO) ! @@ -15609,7 +15614,6 @@ subroutine PTcontracted_matelem_class(jrot) endif ! ! - allocate(recvbuf(mdimen_p,mdimen_p,comm_size),stat=alloc) allocate(grot_t(mdimen_b,startdim:startdim+mdimen_p-1),hrot_t(mdimen_b,startdim:startdim+mdimen_p-1), & gcor_t(mdimen_b,startdim:startdim+mdimen_p-1),stat=alloc) call ArrayStart('grot-gcor-fields',alloc,1,kind(f_t),rootsize) @@ -15622,8 +15626,10 @@ subroutine PTcontracted_matelem_class(jrot) ! if (trim(job%kinetmat_format).eq.'MPIIO') then if(mpi_rank.eq.0) then - call MPI_File_seek(chkptMPIIO, mpioffset, MPI_SEEK_END) - call MPI_File_write(chkptMPIIO,'g_rot',5,mpi_character,mpi_status_ignore,ierr) + !call MPI_File_seek(chkptMPIIO, mpioffset, MPI_SEEK_END) + call MPI_File_write_shared(chkptMPIIO,'g_rot',5,mpi_character,mpi_status_ignore,ierr) + !else + ! call MPI_File_write_shared(chkptMPIIO,'',0,mpi_character,mpi_status_ignore,ierr) endif else write(chkptIO) 'g_rot' @@ -15707,8 +15713,11 @@ subroutine PTcontracted_matelem_class(jrot) if (trim(job%IOkinet_action)=='SAVE'.and..not.job%IOmatelem_split) then ! if (trim(job%kinetmat_format).eq.'MPIIO') then + ! call MPI_File_seek(chkptMPIIO, mpioffset, MPI_SEEK_END) if(mpi_rank.eq.0) then - call MPI_File_write(chkptMPIIO,'g_cor',5,mpi_character,mpi_status_ignore,ierr) + call MPI_File_write_shared(chkptMPIIO,'g_cor',5,mpi_character,mpi_status_ignore,ierr) + !else + ! call MPI_File_write_shared(chkptMPIIO,'',0,mpi_character,mpi_status_ignore,ierr) endif else write(chkptIO) 'g_cor' @@ -16093,7 +16102,7 @@ subroutine PTcontracted_matelem_class(jrot) write(*,*) "TODO: NEEDS VERIFICATION" call divided_slice_open_mpi(islice,chkptMPIIO_,'g_vib',job%matelem_suffix) ! - call co_read_matrix_distr(gvib_t, mdimen, startdim, enddim, chkptMPIIO_) + call co_read_matrix_distr_ordered(gvib_t, mdimen, startdim, enddim, chkptMPIIO_) ! do b=1,comm_size if (send_or_recv(b).ge.0) then @@ -16176,8 +16185,10 @@ subroutine PTcontracted_matelem_class(jrot) ! if (trim(job%kinetmat_format).eq.'MPIIO') then if(mpi_rank.eq.0) then - call MPI_File_seek(chkptMPIIO, mpioffset, MPI_SEEK_END) - call MPI_File_write(chkptMPIIO,'hvib',4,mpi_character,mpi_status_ignore,ierr) + !call MPI_File_seek(chkptMPIIO, mpioffset, MPI_SEEK_END) + call MPI_File_write_shared(chkptMPIIO,'hvib',4,mpi_character,mpi_status_ignore,ierr) + !else + ! call MPI_File_write_shared(chkptMPIIO,'',0,mpi_character,mpi_status_ignore,ierr) endif call co_write_matrix_distr(hvib%me,mdimen, startdim, enddim,chkptMPIIO) else @@ -16197,9 +16208,9 @@ subroutine PTcontracted_matelem_class(jrot) ! if (trim(job%kinetmat_format).eq.'MPIIO') then call mpi_barrier(mpi_comm_world, ierr) + call MPI_File_seek_shared(chkptMPIIO, mpioffset, MPI_SEEK_END) if(mpi_rank.eq.0) then - call MPI_File_seek(chkptMPIIO, mpioffset, MPI_SEEK_END) - call MPI_File_write(chkptMPIIO,'End Kinetic part',16,mpi_character,mpi_status_ignore,ierr) + call MPI_File_write_shared(chkptMPIIO,'End Kinetic part',16,mpi_character,mpi_status_ignore,ierr) endif call MPI_File_close(chkptMPIIO, ierr) else @@ -16238,9 +16249,9 @@ subroutine PTcontracted_matelem_class(jrot) if (mpi_rank.eq.0) then !AT call TimerStart('mpiiosingle') !AT - call MPI_File_write(chkptMPIIO,'[MPIIO]',7,mpi_character,mpi_status_ignore,ierr) - call MPI_File_write(chkptMPIIO,'Start external field',20,mpi_character,mpi_status_ignore,ierr) - call MPI_File_write(chkptMPIIO, PT%Maxcontracts, 1, mpi_integer, mpi_status_ignore, ierr) + call MPI_File_write_shared(chkptMPIIO,'[MPIIO]Start external field',27,mpi_character,mpi_status_ignore,ierr) + !call MPI_File_write_shared(chkptMPIIO,'Start external field',20,mpi_character,mpi_status_ignore,ierr) + call MPI_File_write_shared(chkptMPIIO, PT%Maxcontracts, 1, mpi_integer, mpi_status_ignore, ierr) ! ! store the bookkeeping information about the contr. basis set ! @@ -16248,6 +16259,13 @@ subroutine PTcontracted_matelem_class(jrot) call TimerStop('mpiiosingle') !AT ! + !else + ! call TimerStart('mpiiosingle') !AT + ! ! + ! call MPI_File_write_shared(chkptMPIIO,'',0,mpi_character,mpi_status_ignore,ierr) + ! call MPI_File_write_shared(chkptMPIIO,'',0,mpi_character,mpi_status_ignore,ierr) + ! ! + ! call TimerStop('mpiiosingle') !AT endif else call IOStart(trim(job_is),chkptIO) @@ -16316,10 +16334,9 @@ subroutine PTcontracted_matelem_class(jrot) ! if (trim(job%kinetmat_format).eq.'MPIIO') then if(mpi_rank.eq.0) then - call MPI_File_seek(chkptMPIIO, mpioffset, MPI_SEEK_END) - call MPI_File_write(chkptMPIIO,imu,1,mpi_integer,mpi_status_ignore,ierr) + call MPI_File_write_shared(chkptMPIIO,imu,1,mpi_integer,mpi_status_ignore,ierr) endif - call mpi_barrier(mpi_comm_world,ierr) + !call mpi_barrier(mpi_comm_world,ierr) ! call co_write_matrix_distr(extF_t,mdimen, startdim, enddim,chkptMPIIO) else @@ -16374,7 +16391,9 @@ subroutine PTcontracted_matelem_class(jrot) if (mpi_rank.eq.0) then !AT if(.not.job%IOextF_divide) then !call MPI_File_seek(chkptMPIIO, mpioffset, MPI_SEEK_END) - call MPI_File_write(chkptMPIIO,'End external field',18,mpi_character,mpi_status_ignore,ierr) + call MPI_File_write_shared(chkptMPIIO,'End external field',18,mpi_character,mpi_status_ignore,ierr) + !else + ! call MPI_File_write_shared(chkptMPIIO,'',0,mpi_character,mpi_status_ignore,ierr) endif endif call MPI_File_close(chkptMPIIO, ierr) @@ -16527,12 +16546,12 @@ subroutine write_divided_slice_mpi(islice,name,suffix,N,field) call MPI_File_set_size(chkptMPIIO, offset, ierr) call mpi_barrier(mpi_comm_world, ierr) ! - if(mpi_rank .eq. 0) call MPI_File_write(chkptMPIIO,name,len(trim(name)),mpi_character,mpi_status_ignore,ierr) + if(mpi_rank .eq. 0) call MPI_File_write_shared(chkptMPIIO,name,len(trim(name)),mpi_character,mpi_status_ignore,ierr) call mpi_barrier(mpi_comm_world, ierr) ! call co_write_matrix_distr(field, N, co_startdim, co_enddim,chkptMPIIO) ! - if(mpi_rank .eq. 0) call MPI_File_write(chkptMPIIO,name,len(trim(name)),mpi_character,mpi_status_ignore,ierr) + if(mpi_rank .eq. 0) call MPI_File_write_shared(chkptMPIIO,name,len(trim(name)),mpi_character,mpi_status_ignore,ierr) ! call MPI_File_close(chkptMPIIO, ierr) ! @@ -16606,11 +16625,14 @@ subroutine divided_slice_open_mpi(islice,chkptIO,name,suffix) ! ilen = LEN_TRIM(name) ! - call MPI_File_read_all(chkptIO, buf, ilen, mpi_character, mpi_status_ignore, ierr) - if ( trim(buf(1:ilen))/=trim(name) ) then - if(mpi_rank.eq.0) write (out,"(' kinetic checkpoint slice ',a20,': header is missing or wrong',a)") filename,buf(1:ilen) - stop 'PTrestore_rot_kinetic_matrix_elements - in slice - header missing or wrong' - end if + if (mpi_rank.eq.0) then + call MPI_File_read_shared(chkptIO, buf, ilen, mpi_character, mpi_status_ignore, ierr) + if ( trim(buf(1:ilen))/=trim(name) ) then + write (out,"(' kinetic checkpoint slice ',a20,': header is missing or wrong',a)") filename,buf(1:ilen) + call MPI_Abort(mpi_comm_world, 1) + !stop 'PTrestore_rot_kinetic_matrix_elements - in slice - header missing or wrong' + endif + endif end subroutine divided_slice_open_mpi ! subroutine divided_slice_close(islice,chkptIO,name) @@ -16650,11 +16672,14 @@ subroutine divided_slice_close_mpi(islice,chkptIO,name) ! ilen = LEN_TRIM(name) ! - call MPI_File_read_all(chkptIO, buf, ilen, mpi_character, mpi_status_ignore, ierr) - if ( trim(buf(1:ilen))/=trim(name) ) then - if(mpi_rank .eq. 0) write (out,"(' divided_slice_close, kinetic checkpoint slice ',a,': footer is missing or wrong',a)") trim(name),buf(1:ilen) - stop 'divided_slice_close - in slice - footer missing or wrong' - end if + if(mpi_rank .eq. 0) then + call MPI_File_read_shared(chkptIO, buf, ilen, mpi_character, mpi_status_ignore, ierr) + if ( trim(buf(1:ilen))/=trim(name) ) then + write (out,"(' divided_slice_close, kinetic checkpoint slice ',a,': footer is missing or wrong',a)") trim(name),buf(1:ilen) + call MPI_Abort(mpi_comm_world, 1) + !stop 'divided_slice_close - in slice - footer missing or wrong' + endif + endif ! call MPI_File_close(chkptIO, ierr) ! @@ -34015,7 +34040,7 @@ subroutine PTstore_icontr_cnu(Maxcontracts,iunit,dir) ! end subroutine PTstore_icontr_cnu - subroutine PTstorempi_icontr_cnu(maxcontracts,iunit,dir) + subroutine PTstoreMPI_icontr_cnu(maxcontracts,iunit,dir) use mpi_aux integer(ik),intent(in) :: maxcontracts diff --git a/tran.f90 b/tran.f90 index c612366..2e4694a 100644 --- a/tran.f90 +++ b/tran.f90 @@ -1587,9 +1587,9 @@ subroutine TRconvert_matel_j0_eigen(jrot) task = 'rot' ! if (trim(job%kinetmat_format).eq.'MPIIO') then - if(mpi_rank.eq.0) call MPI_File_write(fileh_w, 'g_rot', 5, mpi_character, mpi_status_ignore, ierr) + call MPI_File_seek_shared(fileh_w, int(0,MPI_OFFSET_KIND), MPI_SEEK_END) + if(mpi_rank.eq.0) call MPI_File_write_shared(fileh_w, 'g_rot', 5, mpi_character, mpi_status_ignore, ierr) call mpi_barrier(MPI_COMM_WORLD, ierr) - call MPI_File_seek(fileh_w, int(0,MPI_OFFSET_KIND), MPI_SEEK_END) ! call restore_rot_kinetic_matrix_elements_mpi(jrot,treat_vibration,task,fileh) else @@ -1620,7 +1620,8 @@ subroutine TRconvert_matel_j0_eigen(jrot) call MPI_File_get_position(fileh, read_offset, ierr) call MPI_File_set_view(fileh, read_offset, mpi_byte, gmat_block_type, "native", MPI_INFO_NULL, ierr) - call MPI_File_get_position(fileh_w, write_offset, ierr) + !call MPI_File_seek_shared(fileh_w, int(0,MPI_OFFSET_KIND),MPI_SEEK_END) + call MPI_File_get_position_shared(fileh_w, write_offset, ierr) call MPI_File_set_view(fileh_w, write_offset, mpi_byte, mat_s_block_type, "native", MPI_INFO_NULL, ierr) endif ! @@ -1700,8 +1701,11 @@ subroutine TRconvert_matel_j0_eigen(jrot) call MPI_File_seek(fileh, read_offset, MPI_SEEK_SET) write_offset = write_offset + 9*int(Neigenroots,MPI_OFFSET_KIND)*Neigenroots*mpi_real_size + !call MPI_File_set_view(fileh_w, write_offset, mpi_byte, mpi_byte, "native", MPI_INFO_NULL, ierr) call MPI_File_set_view(fileh_w, int(0,MPI_OFFSET_KIND), mpi_byte, mpi_byte, "native", MPI_INFO_NULL, ierr) - call MPI_File_seek(fileh_w, write_offset, MPI_SEEK_SET) + call MPI_File_seek_shared(fileh_w, write_offset, MPI_SEEK_SET) + !write_offset = 0 + !call MPI_File_seek_shared(fileh_w, write_offset, MPI_SEEK_END) endif ! @@ -1717,9 +1721,9 @@ subroutine TRconvert_matel_j0_eigen(jrot) ! call restore_rot_kinetic_matrix_elements_mpi(jrot,treat_vibration,task,fileh) ! - if(mpi_rank.eq.0) call MPI_File_write(fileh_w, 'g_cor', 5, mpi_character, mpi_status_ignore, ierr) + if(mpi_rank.eq.0) call MPI_File_write_shared(fileh_w, 'g_cor', 5, mpi_character, mpi_status_ignore, ierr) call MPI_Barrier(MPI_COMM_WORLD, ierr) - call MPI_File_seek(fileh_w, int(0,MPI_OFFSET_KIND), MPI_SEEK_END) + !call MPI_File_seek_shared(fileh_w, int(0,MPI_OFFSET_KIND), MPI_SEEK_END) else call restore_rot_kinetic_matrix_elements(jrot,treat_vibration,task,iunit) ! @@ -1738,7 +1742,7 @@ subroutine TRconvert_matel_j0_eigen(jrot) call MPI_File_get_position(fileh, read_offset, ierr) call MPI_File_set_view(fileh, read_offset, mpi_byte, gmat_block_type, "native", MPI_INFO_NULL, ierr) - call MPI_File_get_position(fileh_w, write_offset, ierr) + call MPI_File_get_position_shared(fileh_w, write_offset, ierr) call MPI_File_set_view(fileh_w, write_offset, mpi_byte, mat_s_block_type, "native", MPI_INFO_NULL, ierr) endif ! @@ -1820,15 +1824,17 @@ subroutine TRconvert_matel_j0_eigen(jrot) call MPI_File_seek(fileh, read_offset, MPI_SEEK_SET) write_offset = write_offset + 3*int(Neigenroots,MPI_OFFSET_KIND)*Neigenroots*mpi_real_size + !call MPI_File_set_view(fileh_w, write_offset, mpi_byte, mpi_byte, "native", MPI_INFO_NULL, ierr) call MPI_File_set_view(fileh_w, int(0,MPI_OFFSET_KIND), mpi_byte, mpi_byte, "native", MPI_INFO_NULL, ierr) - call MPI_File_seek(fileh_w, write_offset, MPI_SEEK_SET) + call MPI_File_seek_shared(fileh_w, write_offset, MPI_SEEK_END) + !write_offset = 0 endif ! if (job%verbose>=5) call TimerStop('J0-convertion for g_cor') ! if ((.not.job%IOmatelem_split.or.job%iswap(1)==1).and.(mpi_rank.eq.0)) then if (trim(job%kinetmat_format).eq.'MPIIO') then - call MPI_File_write(fileh_w, 'End Kinetic part', 16, mpi_character, mpi_status_ignore, ierr) + call MPI_File_write_shared(fileh_w, 'End Kinetic part', 16, mpi_character, mpi_status_ignore, ierr) else write(chkptIO) 'End Kinetic part' endif @@ -1950,8 +1956,8 @@ subroutine TRconvert_matel_j0_eigen(jrot) call mpi_file_set_size(fileh_w, mpioffset, ierr) ! if(mpi_rank.eq.0) then - call mpi_file_write(fileh_w, '[MPIIO]', 7, mpi_character, mpi_status_ignore, ierr) - call mpi_file_write(fileh_w, 'start external field', 20, mpi_character, mpi_status_ignore, ierr) + call mpi_file_write_shared(fileh_w, '[MPIIO]', 7, mpi_character, mpi_status_ignore, ierr) + call mpi_file_write_shared(fileh_w, 'start external field', 20, mpi_character, mpi_status_ignore, ierr) endif ! ! store the matrix elements @@ -1991,7 +1997,7 @@ subroutine TRconvert_matel_j0_eigen(jrot) call MPI_File_get_position(fileh, read_offset, ierr) call MPI_File_set_view(fileh, read_offset, mpi_byte, extF_block_type, "native", MPI_INFO_NULL, ierr) - call MPI_File_get_position(fileh_w, write_offset, ierr) + call MPI_File_get_position_shared(fileh_w, write_offset, ierr) call MPI_File_set_view(fileh_w, write_offset, mpi_byte, mat_s_block_type, "native", MPI_INFO_NULL, ierr) endif ! @@ -2068,8 +2074,8 @@ subroutine TRconvert_matel_j0_eigen(jrot) if (trim(job%kinetmat_format).eq.'MPIIO') then if(mpi_rank.eq.0) call MPI_File_write(fileh_w, imu, 1, mpi_integer, mpi_status_ignore, ierr) call MPI_Barrier(mpi_comm_world, ierr) - call MPI_File_seek(fileh_w, int(0,MPI_OFFSET_KIND), MPI_SEEK_END) - call MPI_File_write(fileh_w, mat_s, size(mat_s), mpi_double_precision, mpi_status_ignore, ierr) + call MPI_File_seek_shared(fileh_w, int(0,MPI_OFFSET_KIND), MPI_SEEK_END) + call MPI_File_write_all(fileh_w, mat_s, size(mat_s), mpi_double_precision, mpi_status_ignore, ierr) else write(chkptIO) imu write(chkptIO) mat_s @@ -2095,7 +2101,9 @@ subroutine TRconvert_matel_j0_eigen(jrot) write_offset = write_offset + 3*int(Neigenroots,MPI_OFFSET_KIND)*Neigenroots*mpi_real_size call MPI_File_set_view(fileh_w, int(0,MPI_OFFSET_KIND), mpi_byte, mpi_byte, "native", MPI_INFO_NULL, ierr) - call MPI_File_seek(fileh_w, write_offset, MPI_SEEK_SET) + call MPI_File_seek_shared(fileh_w, write_offset, MPI_SEEK_SET) + !write_offset = 0 + !call MPI_File_seek_shared(fileh_w, write_offset, MPI_SEEK_END) endif ! if (allocated(extF_me)) deallocate(extF_me) From b9060cdeedd9b05bbed70518d861469b25c19f70 Mon Sep 17 00:00:00 2001 From: Anastasis Georgoulas Date: Fri, 16 Apr 2021 11:03:39 +0100 Subject: [PATCH 28/79] Pretend we're always using MPI - blacs_ctxt not used elsewhere, no need to be public --- mpi_aux.f90 | 46 ++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 44 insertions(+), 2 deletions(-) diff --git a/mpi_aux.f90 b/mpi_aux.f90 index 457ec8d..76ed501 100644 --- a/mpi_aux.f90 +++ b/mpi_aux.f90 @@ -1,5 +1,7 @@ module mpi_aux + #ifdef TROVE_USE_MPI_ use mpi_f08 + #endif use timer use accuracy implicit none @@ -10,7 +12,7 @@ module mpi_aux public send_or_recv, comm_size, mpi_rank public co_startdim, co_enddim - public blacs_size, blacs_rank, blacs_ctxt + public blacs_size, blacs_rank public nprow,npcol,myprow,mypcol public mpi_real_size, mpi_int_size @@ -38,6 +40,7 @@ subroutine co_init_blacs() if (.not. comms_inited) stop "CO_INIT_BLACS COMMS NOT INITED" +#ifdef TROVE_USE_MPI_ ! Must be initialised to zero - if stack contains garbage here MPI_Dims_create WILL fail blacs_dims = 0 @@ -51,6 +54,14 @@ subroutine co_init_blacs() call blacs_gridinfo(blacs_ctxt, nprow, npcol, myprow, mypcol) !write(*,"('BLACS: [',i2,',',i2'](',i4,i4,i4,i4',)')") mpi_rank,blacs_rank,nprow,npcol,myprow,mypcol +#else + blacs_size = 1 + blacs_rank = 0 + nprow = 1 + npcol = 1 + myprow = 1 + mypcol = 1 +#endif end subroutine co_init_blacs subroutine co_block_type_init(smat, dimx, dimy, descr, allocinfo, mpi_type) @@ -97,6 +108,7 @@ subroutine co_sum_double(x, root_process) integer, optional :: root_process if (comm_size.eq.1) return +#ifdef TROVE_USE_MPI_ call TimerStart('co_sum_double') if (present(root_process)) then @@ -111,10 +123,12 @@ subroutine co_sum_double(x, root_process) end if call TimerStop('co_sum_double') +#endif end subroutine subroutine co_init_comms() +#ifdef TROVE_USE_MPI_ integer :: ierr call mpi_init(ierr) @@ -130,11 +144,17 @@ subroutine co_init_comms() if (mpi_rank.ne.0) then open(newunit=out, file='/dev/null', status='replace', iostat=ierr, action="write") endif - +#else + comm_size = 1 + mpi_rank = 0 +#endif comms_inited = .true. call co_init_blacs() + + + end subroutine co_init_comms subroutine co_finalize_comms() @@ -142,15 +162,20 @@ subroutine co_finalize_comms() if (.not. comms_inited) stop "CO_FINALIZE_COMMS COMMS NOT INITED" +#ifdef TROVE_USE_MPI_ call mpi_finalize(ierr) if (ierr .gt. 0) stop "MPI_FINALIZE" +#endif end subroutine co_finalize_comms + subroutine co_init_distr(dimen, startdim, enddim, blocksize) integer,intent(in) :: dimen integer,intent(out) :: startdim, enddim, blocksize + +#ifdef TROVE_USE_MPI_ integer,dimension(:),allocatable :: starts, ends integer :: localsize, proc_index, localsize_ integer :: i, ierr, to_calc, ioslice_width, ioslice_maxwidth @@ -256,14 +281,28 @@ subroutine co_init_distr(dimen, startdim, enddim, blocksize) deallocate(starts,ends) distr_inited = .true. +#else + startdim = 1 + enddim = dimen + co_startdim = 1 + co_enddim = dimen + blocksize = dimen*dimen + send_or_recv(1) = 0 +#endif + end subroutine co_init_distr + ! + ! Distribute the contents of an array among processes. + ! If only using one process or not using MPI, do nothing. + ! subroutine co_distr_data(x, tmp, blocksize, lb, ub) real(rk),dimension(:,lb:),intent(inout) :: x real(rk),dimension(:,:,:),intent(inout) :: tmp integer,intent(in) :: blocksize, lb, ub +#ifdef TROVE_USE_MPI_ integer :: i, icoeff, jcoeff, offset, ierr, k type(MPI_Request) :: reqs(comm_size) @@ -304,6 +343,7 @@ subroutine co_distr_data(x, tmp, blocksize, lb, ub) enddo call TimerStop('MPI_transpose_local') call TimerStop('MPI_transpose') +#endif end subroutine co_distr_data @@ -375,6 +415,7 @@ subroutine co_write_matrix_distr(x, longdim, lb, ub, outfile) end subroutine co_write_matrix_distr +#ifdef TROVE_USE_MPI_ subroutine co_create_type_column(extent, blocksize, ncols) integer, intent(in) :: extent, blocksize, ncols integer :: ierr,writecount @@ -401,5 +442,6 @@ subroutine co_create_type_subarray(extent, coldim, rowdim, blockid, mpi_newtype) call MPI_Type_commit(mpi_newtype, ierr) end subroutine co_create_type_subarray +#endif end module From 7e8b53a8a348832b53fe895fdbcf1ad672a536f9 Mon Sep 17 00:00:00 2001 From: Anastasis Georgoulas Date: Fri, 16 Apr 2021 19:21:44 +0100 Subject: [PATCH 29/79] Use dummy MPI type in co_block_type_init --- mpi_aux.f90 | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/mpi_aux.f90 b/mpi_aux.f90 index 76ed501..407f815 100644 --- a/mpi_aux.f90 +++ b/mpi_aux.f90 @@ -16,10 +16,25 @@ module mpi_aux public nprow,npcol,myprow,mypcol public mpi_real_size, mpi_int_size +#ifndef TROVE_USE_MPI_ + public MPI_Datatype +#endif + interface co_sum module procedure :: co_sum_double end interface +#ifndef TROVE_USE_MPI_ + ! + ! When not using MPI, create a dummy type to maintain the interface of this module, + ! specifically the last argument of co_block_type_init. + ! The value passed will not be accessed. + ! + type MPI_Datatype + integer :: dummy = 0 + end type MPI_Datatype +#endif + integer,dimension(:),allocatable :: proc_sizes, proc_offsets, send_or_recv integer :: comm_size, mpi_rank integer :: co_startdim, co_enddim @@ -69,12 +84,14 @@ subroutine co_block_type_init(smat, dimx, dimy, descr, allocinfo, mpi_type) real(rk),intent(out),dimension(:,:),allocatable :: smat + ! Note: allocated matrix will have total dimensions (dimy x dimx) integer,intent(in) :: dimx, dimy integer,intent(out),dimension(9) :: descr integer,intent(out) :: allocinfo type(MPI_Datatype),intent(out),optional :: mpi_type +#ifdef TROVE_USE_MPI_ integer,dimension(2) :: global_size, distr, dargs integer :: MB,NB,MLOC,NLOC,ierr @@ -99,6 +116,10 @@ subroutine co_block_type_init(smat, dimx, dimy, descr, allocinfo, mpi_type) MPI_ORDER_FORTRAN, MPI_DOUBLE_PRECISION, mpi_type, ierr) call MPI_Type_commit(mpi_type, ierr) endif +#else + allocate(smat(dimy,dimx), stat=allocinfo) + if(allocinfo.ne.0) return +#endif end subroutine co_block_type_init From c5a07b283c080a50ff4fc20aa72a292582a63b4f Mon Sep 17 00:00:00 2001 From: Anastasis Georgoulas Date: Fri, 16 Apr 2021 19:24:14 +0100 Subject: [PATCH 30/79] Use correct BLACS process numbering --- mpi_aux.f90 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mpi_aux.f90 b/mpi_aux.f90 index 407f815..f202a69 100644 --- a/mpi_aux.f90 +++ b/mpi_aux.f90 @@ -74,8 +74,8 @@ subroutine co_init_blacs() blacs_rank = 0 nprow = 1 npcol = 1 - myprow = 1 - mypcol = 1 + myprow = 0 + mypcol = 0 #endif end subroutine co_init_blacs From d4c917b5fb015b23b994631b1f052c5a670684f0 Mon Sep 17 00:00:00 2001 From: Anastasis Georgoulas Date: Fri, 16 Apr 2021 19:40:10 +0100 Subject: [PATCH 31/79] Add back some forgotten steps when not using MPI --- mpi_aux.f90 | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/mpi_aux.f90 b/mpi_aux.f90 index f202a69..aa5b508 100644 --- a/mpi_aux.f90 +++ b/mpi_aux.f90 @@ -195,11 +195,12 @@ end subroutine co_finalize_comms subroutine co_init_distr(dimen, startdim, enddim, blocksize) integer,intent(in) :: dimen integer,intent(out) :: startdim, enddim, blocksize + integer :: ierr #ifdef TROVE_USE_MPI_ integer,dimension(:),allocatable :: starts, ends integer :: localsize, proc_index, localsize_ - integer :: i, ierr, to_calc, ioslice_width, ioslice_maxwidth + integer :: i, to_calc, ioslice_width, ioslice_maxwidth if (.not. comms_inited) stop "COMMS NOT INITIALISED" !if (distr_inited) stop "DISTRIBUTION ALREADY INITIALISED" @@ -301,8 +302,12 @@ subroutine co_init_distr(dimen, startdim, enddim, blocksize) deallocate(starts,ends) - distr_inited = .true. #else + if (.not. comms_inited) stop "COMMS NOT INITIALISED" + if (.not. distr_inited) then + allocate(send_or_recv(1),stat=ierr) + if (ierr .gt. 0) stop "CO_INIT_DISTR ALLOCATION FAILED" + endif startdim = 1 enddim = dimen co_startdim = 1 @@ -311,6 +316,7 @@ subroutine co_init_distr(dimen, startdim, enddim, blocksize) send_or_recv(1) = 0 #endif + distr_inited = .true. end subroutine co_init_distr ! From c9935c8567cf9ab40739db689deda75c71d9ee43 Mon Sep 17 00:00:00 2001 From: Anastasis Georgoulas Date: Tue, 20 Apr 2021 08:15:12 +0100 Subject: [PATCH 32/79] Fix indentation --- mpi_aux.f90 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mpi_aux.f90 b/mpi_aux.f90 index aa5b508..252a80f 100644 --- a/mpi_aux.f90 +++ b/mpi_aux.f90 @@ -1,7 +1,7 @@ module mpi_aux - #ifdef TROVE_USE_MPI_ +#ifdef TROVE_USE_MPI_ use mpi_f08 - #endif +#endif use timer use accuracy implicit none From e9c04a16270bb4e8f015b440ebface326b178925 Mon Sep 17 00:00:00 2001 From: Anastasis Georgoulas Date: Tue, 20 Apr 2021 09:12:37 +0100 Subject: [PATCH 33/79] Compile with MPI support --- makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/makefile b/makefile index c7bb7f3..58e9427 100644 --- a/makefile +++ b/makefile @@ -13,7 +13,7 @@ pot_user = pot_ch4 PLAT = #PLAT = _2205_i17 FOR = mpif90 -FFLAGS = -qopenmp -xHost -O3 -ip -g3 -traceback +FFLAGS = -qopenmp -xHost -O3 -ip -g3 -traceback -DTROVE_USE_MPI_ #FFLAGS = -fopenmp -ffree-line-length-none -march=native -O3 -fcray-pointer -g3 From b4f9c86feb690997d03b938a6c4e4605da6d6aac Mon Sep 17 00:00:00 2001 From: Jamie Quinn Date: Thu, 29 Apr 2021 10:57:03 +0100 Subject: [PATCH 34/79] Fix compilation with gfortran --- makefile | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/makefile b/makefile index 58e9427..710f203 100644 --- a/makefile +++ b/makefile @@ -13,17 +13,15 @@ pot_user = pot_ch4 PLAT = #PLAT = _2205_i17 FOR = mpif90 -FFLAGS = -qopenmp -xHost -O3 -ip -g3 -traceback -DTROVE_USE_MPI_ -#FFLAGS = -fopenmp -ffree-line-length-none -march=native -O3 -fcray-pointer -g3 +#FFLAGS = -qopenmp -xHost -O3 -ip -g3 -traceback -DTROVE_USE_MPI_ +FFLAGS = -fopenmp -ffree-line-length-512 -march=native -O0 -fcray-pointer -g -fallow-argument-mismatch -fbacktrace -DTROVE_USE_MPI_ +#LAPACK = -mkl=parallel -lmkl_scalapack_lp64 -lmkl_blacs_intelmpi_lp64 +LAPACK = -L${MKLROOT}/lib/intel64 -Wl,--no-as-needed -lmkl_gf_lp64 -lmkl_gnu_thread -lmkl_blacs_intelmpi_lp64 -lmkl_scalapack_lp64 -lmkl_core -lgomp -lpthread -lm -ldl -#ARPACK = ~/libraries/ARPACK/libarpack_omp_64.a +ARPACK = -larpack -LAPACK = -mkl=parallel -lmkl_scalapack_lp64 -lmkl_blacs_intelmpi_lp64 -#LAPACK = -L${MKLROOT}/lib/intel64 -Wl,--no-as-needed -lmkl_gf_lp64 -lmkl_sequential -lmkl_core -lpthread -lm -ldl - - -LIB = $(LAPACK) +LIB = $(LAPACK) $(ARPACK) %.o : %.f90 $(FOR) -cpp -c $(FFLAGS) $< From 686a46e2d1a726177b0570cc2bdc5eb25a01c304 Mon Sep 17 00:00:00 2001 From: Jamie Quinn Date: Thu, 29 Apr 2021 14:13:27 +0100 Subject: [PATCH 35/79] Remove unused function --- lapack.f90 | 427 ----------------------------------------------- perturbation.f90 | 3 - 2 files changed, 430 deletions(-) diff --git a/lapack.f90 b/lapack.f90 index b52b0e0..7aad724 100644 --- a/lapack.f90 +++ b/lapack.f90 @@ -1657,433 +1657,6 @@ end subroutine matvec_p - subroutine dseupd_p_arpack(n,bterm,nroots,factor,maxitr_,tol,h,e) - - integer,intent(in) :: n,bterm(n,2),maxitr_ - integer,intent(inout) :: nroots - double precision,intent(in) :: factor - double precision,intent(in) :: tol - double precision,intent(inout) :: h(:,:) - double precision,intent(out) :: e(nroots) - - double precision,allocatable :: v(:,:),workl(:),workd(:),d(:,:),resid(:) - ! - logical,allocatable :: select(:) - integer(ik) :: iparam(11), ipntr(11),ldv,iter - ! - integer,parameter :: maxnprocs=256 - ! - integer(ik) :: kstart(0:maxnprocs),iproc,dx,i,jproc - ! - character(len=1) :: bmat - character(len=2) :: which - character(len=cl) :: blacs_or_mpi = 'NONE' - ! - integer(ik) :: ido, nev, ncv, lworkl, info, ierr, j, & - mode, ishfts, alloc, maxitr - ! - logical rvec - double precision :: sigma - - ! - !double precision,external :: dnrm2 - ! - -! -!----------------------------------------------------------------------- -! - integer logfil, ndigit, mgetv0,& - msaupd, msaup2, msaitr, mseigt, msapps, msgets, mseupd,& - mnaupd, mnaup2, mnaitr, mneigh, mnapps, mngets, mneupd,& - mcaupd, mcaup2, mcaitr, mceigh, mcapps, mcgets, mceupd - common /debug/ & - logfil, ndigit, mgetv0,& - msaupd, msaup2, msaitr, mseigt, msapps, msgets, mseupd,& - mnaupd, mnaup2, mnaitr, mneigh, mnapps, mngets, mneupd,& - mcaupd, mcaup2, mcaitr, mceigh, mcapps, mcgets, mceupd - - real t0, t1, t2, t3, t4, t5 - save t0, t1, t2, t3, t4, t5 -! - integer nopx, nbx, nrorth, nitref, nrstrt - real tsaupd, tsaup2, tsaitr, tseigt, tsgets, tsapps, tsconv,& - tnaupd, tnaup2, tnaitr, tneigh, tngets, tnapps, tnconv,& - tcaupd, tcaup2, tcaitr, tceigh, tcgets, tcapps, tcconv,& - tmvopx, tmvbx, tgetv0, titref, trvec - common /timing/ & - nopx, nbx, nrorth, nitref, nrstrt,& - tsaupd, tsaup2, tsaitr, tseigt, tsgets, tsapps, tsconv,& - tnaupd, tnaup2, tnaitr, tneigh, tngets, tnapps, tnconv,& - tcaupd, tcaup2, tcaitr, tceigh, tcgets, tcapps, tcconv,& - tmvopx, tmvbx, tgetv0, titref, trvec - - - -! %-----------------% -! | BLACS INTERFACE | -! %-----------------% -! - integer comm, iam, nprocs, nloc, & - nprow, npcol, myprow, mypcol -! - !external BLACS_PINFO, BLACS_SETUP, BLACS_GET, & - ! BLACS_GRIDINIT, BLACS_GRIDINFO - integer,parameter :: MPI_COMM_WORLD=0 -! - -! %---------------% -! | MPI INTERFACE | -! %---------------% - - integer myid, rc - - -! -! %----------------------------------------------% -! | Local Buffers needed for BLACS communication | -! %----------------------------------------------% -! - Double precision,allocatable :: mv_buf(:) -! -! %------------% -! | Parameters | -! %------------% -! - Double precision zero - parameter (zero = 0.0) -! -! %-----------------------------% -! | BLAS & LAPACK routines used | -! %-----------------------------% -! - Double precision pdnorm2 - external pdnorm2, daxpy -! -! %---------------------% -! | Intrinsic Functions | -! %---------------------% -! - intrinsic abs -! -! %-----------------------% -! | Executable Statements | -! %-----------------------% -! - -#if (blacs_ > 0) - call BLACS_PINFO( iam, nprocs ) - blacs_or_mpi = 'BLACS' -#endif - -#if (mpi_ > 0) - call MPI_INIT( ierr ) - comm = MPI_COMM_WORLD - call MPI_COMM_RANK( comm, myid, ierr ) - call MPI_COMM_SIZE( comm, nprocs, ierr ) - ! - print *,comm,myid,nprocs - ! - if (trim(blacs_or_mpi)=='BLACS') then - write(out,"('dseupd_p_arpack: IT IS ILLEGAL TO USE MPI AND BLACS AT THE SAME TIME')") - stop 'dseupd_p_arpack: IT IS ILLEGAL TO USE MPI AND BLACS AT THE SAME TIME' - endif - ! - blacs_or_mpi = 'MPI' - ! - ! -#endif - - -! -! If in PVM, create virtual machine if it doesn't exist -! - if (nprocs .lt. 1) then - nprocs = 1 -#if (blacs_ > 0) - call BLACS_SETUP( iam, nprocs ) -#endif - endif - if (nprocs >maxnprocs) stop 'nprocs > maxnprocs' - ! - ! Set up processors in 1D Grid - ! - nprow = nprocs - npcol = 1 - ! - do iproc = 0,nprocs - ! - kstart(iproc) = iproc*(n/nprocs+1)+1 - if (iproc>=mod(n,nprocs)) kstart(iproc) = iproc*(n/nprocs)+1+mod(n,nprocs) - print *,iproc,kstart(iproc) - ! - enddo - ! - if (verbose>=2) call TimerStart('dseupd_p_arpack: diagonalization') - ! - nev = nroots - ! - if (nroots==0) nev=max(100,n) - ! - ncv = factor*nev - ! - if (ncv==nev) ncv = int(21*nev/10)+1 - ! - ncv = min(n,ncv) - ! - bmat = 'I' - which = 'SM' - ! - ! The work array WORKL is used in DSAUPD as workspace. - ! The parameter TOL determines the stopping criterion. - ! If TOL<=0, machine precision is used. The variable IDO is used for - ! reverse communication and is initially set to 0. - ! Setting INFO=0 indicates that a random vector is generated in DSAUPD - ! to start the Arnoldi iteration. - ! - - lworkl = ncv*(ncv+10) - ldv = n - ! - info = 0 - ido = 0 - ! - allocate(v(ldv,ncv), workl(lworkl),workd(3*n),d(ncv,2),resid(n),select(ncv),mv_buf(n),stat=alloc) - call ArrayStart('dseupd_p_arpack',alloc,size(v),kind(v)) - call ArrayStart('dseupd_p_arpack',alloc,size(workl),kind(workl)) - call ArrayStart('dseupd_p_arpack',alloc,size(workd),kind(workd)) - call ArrayStart('dseupd_p_arpack',alloc,size(d),kind(d)) - call ArrayStart('dseupd_p_arpack',alloc,size(resid),kind(resid)) - call ArrayStart('dseupd_p_arpack',alloc,size(select),kind(select)) - call ArrayStart('dseupd_p_arpack',alloc,size(mv_buf),kind(mv_buf)) - - ! - ! Estimate block overlapping: - ! - dx = 0 - ! - do iproc = 0,nprocs-1 - ! - do i = kstart(iproc),kstart(iproc+1)-1 - ! - do jproc = 0,iproc-1 - ! - if (bterm(i,1)kstart(jproc)) dx = max(dx,jproc-iproc) - ! - enddo - ! - enddo - ! - enddo - ! -! -! Get default system context, and define grid -! - ! - myprow = 1 ; mypcol = 1 ; myid = 1 -#if (blacs_ > 0) - call BLACS_GET( 0, 0, comm ) - call BLACS_GRIDINIT( comm, 'Row', nprow, npcol ) - call BLACS_GRIDINFO( comm, nprow, npcol, myprow, mypcol ) -#endif - ! - if (verbose>=2.and.trim(blacs_or_mpi)=='BLACS') write(out,"('myprow, nprow, mypcol, npcol, nprocs = ',5i8)") & - myprow, nprow, mypcol, npcol, nprocs - if (verbose>=2.and.trim(blacs_or_mpi)=='MPI') write(out,"('myid,nproc = ',2i8)") myid,nprocs -! ! -! If I'm not in grid, go to end of program -! - if ( trim(blacs_or_mpi)=='BLACS'.and.(myprow .ge. nprow) .or. (mypcol .ge. npcol) ) then - write(out,"('not in grid, myprow, nprow, mypcol, npcol = ',4i8)") myprow, nprow, mypcol, npcol - return - endif -! - ndigit = -3 - logfil = 6 - msaupd = 1 - - - !--------------------------------------! - ! Set up distribution of data to nodes ! - !--------------------------------------! - - nloc = (n/nprocs) - if ( mod(n,nprocs)>myprow ) nloc = nloc + 1 - ! - ! - !allocate(mv_buf(n),stat=alloc) - !call ArrayStart('dseupd_p_arpack',alloc,size(mv_buf),kind(mv_buf)) - ! - iparam = 0 - ipntr = 0 - ! - ! This routone uses exact shifts with respect to - ! the current Hessenberg matrix (IPARAM(1) = 1). - ! IPARAM(3) specifies the maximum number of Arnoldi - ! iterations allowed. Mode 1 of DSAUPD is used - ! (IPARAM(7) = 1). - ! - ishfts = 1 - maxitr = maxitr_*nev - mode = 1 - ! - iparam(1) = ishfts - iparam(3) = maxitr - iparam(7) = mode - ! - ! - ! M A I N L O O P (Reverse communication) - ! - !if (verbose>=4) write(out,"(/'Arpack: eigenvalues computed:')") - ! - if (verbose>=3) write(out,"(/'Arpack: n = ',i8,' nev = ',i8,' maxitr = ',i8,' tol = ',e12.5)") n,nev,maxitr,tol - ! - iter = 0 - ! - do while(ido<=1) - ! - iter = iter + 1 - ! - if (verbose>=3.and.mod(iter,10)==0) write(out,"(' iter = ',i8)") iter - ! - ! Repeatedly call the routine DSAUPD and take - ! actions indicated by parameter IDO until - ! either convergence is indicated or maxitr - ! has been exceeded. - ! - -#if (blacs_ > 0 || mpi_ > 0) - ! - call pdsaupd ( comm, ido, bmat, nloc, which, nev, tol, resid, & - ncv, v, ldv, iparam, ipntr, workd, workl, & - lworkl, info ) - ! -#else - ! - call dsaupd ( ido, bmat, n, which, nev, tol, resid, & - ncv, v, ldv, iparam, ipntr, workd, workl, & - lworkl, info ) - ! -#endif - ! - !if (verbose>=4.and.iparam(5)>0) then - ! ! - ! write(out,"(i8)") iparam(5) - ! ! - !endif - ! - if (ido==-1 .or. ido==1) then - ! - ! Perform matrix vector multiplication - ! y <--- OP*x - ! - call matvec_p(comm,n,nloc,nprocs,kstart(0:nprocs),dx,bterm,h,mv_buf,workd(ipntr(1)),workd(ipntr(2))) - ! - endif - ! - enddo - ! - ! Either we have convergence or there is an error. - ! - if ( info < 0 ) then - write(out,"(/'Error with _saupd, info = ',i8)") info - stop 'Error with _saupd' - endif - ! - ! - ! No fatal errors occurred. Post-Process using DSEUPD. - ! Computed eigenvalues may be extracted. - ! - ! Eigenvectors may also be computed now if desired. (indicated by rvec = .true.) - ! - rvec = .true. - ! -#if (blacs_ > 0) - ! - if (verbose>=5) write(out,"(/'Arpack: dseupd')") - ! - call pdseupd ( comm, rvec, 'All', select, d, v, ldv, sigma, & - bmat, n, which, nev, tol, resid, ncv, v, ldv, & - iparam, ipntr, workd, workl, lworkl, ierr ) - - - if (verbose>=5) write(out,"(/'Arpack: done!')") - - ! -#else - ! - write(out,"(/'Arpack was not activated yet. Please uncomment dsaupd and dseupd bellow')") - stop 'Arpack was not activated' - ! -#endif - ! - if ( ierr < 0 ) then - write(out,"(/'Error with_seupd, info = ',i8)") ierr - stop 'Error with _saupd' - endif - ! - nroots = iparam(5) - ! - e(1:nroots) = d(1:nroots,1) - ! - !$omp parallel do private(j) shared(h) schedule(dynamic) - do j=1,n - h(j,1:nroots) = v(j,1:nroots) - enddo - !$omp end parallel do - ! - ! Eigenvalues are returned in the first column - ! of the two dimensional array D and the - ! corresponding eigenvectors are returned in - ! the first NEV columns of the two dimensional - ! array V if requested. Otherwise, an - ! orthogonal basis for the invariant subspace - ! corresponding to the eigenvalues in D is - ! returned in V. - ! - !do j=1, nconv - ! - ! Compute the residual norm || A*x - lambda*x || - ! for the NCONV accurately computed eigenvalues and - ! eigenvectors. (iparam(5) indicates how many are - ! accurate to the requested tolerance) - ! - !call matvec(n,bterm,v(1,j), ax) - ! - !call daxpy(n, -d(j,1), v(1,j), 1, ax, 1) - !d(j,2) = dnrm2(n, ax, 1) - !d(j,2) = d(j,2) / abs(d(j,1)) - ! - !enddo - -! -! %---------------------------% -! | Done with program pdsdrv1.| -! %---------------------------% -! - 9000 continue -! -#if (blacs_ > 0) - call BLACS_GRIDEXIT ( comm ) - call BLACS_EXIT(0) -#endif -#if (mpi_ > 0) - call MPI_FINALIZE(rc) -#endif - - - deallocate(v,workl,workd,d,resid,select,mv_buf) - ! - call ArrayStop('dseupd_p_arpack') - ! - if (verbose>=2) call TimerStop('dseupd_p_arpack: diagonalization') - ! - end subroutine dseupd_p_arpack !subroutine BLACS_GRIDINFO( comm, nprow, npcol, myprow, mypcol ) diff --git a/perturbation.f90 b/perturbation.f90 index c1d5a5f..fb5ae4e 100644 --- a/perturbation.f90 +++ b/perturbation.f90 @@ -7765,7 +7765,6 @@ subroutine close_chkptfile_mpi(fileh) end subroutine close_chkptfile_mpi subroutine PTrestore_rot_kinetic_matrix_elements_mpi(jrot, task, fileh, dimen, & - !PT, PTvibrational_me_calc,grot,gcor,hvib, & ncontr, maxcontr, icontr) use mpi_f08 use mpi_aux @@ -10617,8 +10616,6 @@ subroutine diagonalization_contract(jrot,gamma,dimen_s,mat,zpe,rlevel,total_root stop 'diagonalization_contract, mat - too small' endif ! - !call dseupd_p_arpack(dimen_s,bterm,nroots,job%factor,job%maxiter,job%tolerance,mat,energy) - ! !call diag_dseupd_p(dimen_s,bterm,nroots,job%factor,job%maxiter,job%tolerance,mat,energy) ! call dseupd_omp_arpack(dimen_s,bterm,nroots,job%factor,job%maxiter,job%tolerance,mat,energy) From bd97ded50b1727c9534400b0a5977f99bb0c9d20 Mon Sep 17 00:00:00 2001 From: Jamie Quinn Date: Thu, 29 Apr 2021 14:19:19 +0100 Subject: [PATCH 36/79] Remove unused function `diag_dseupd_p` --- diag.f90 | 455 +---------------------------------------------- perturbation.f90 | 2 - 2 files changed, 1 insertion(+), 456 deletions(-) diff --git a/diag.f90 b/diag.f90 index 90041cb..a070a1c 100644 --- a/diag.f90 +++ b/diag.f90 @@ -15,7 +15,7 @@ module diag private verbose ! ! - public diag_tridiag,diag_tridiag_pack,diag_dsyev_i8,diag_dgelss,diag_syev_ilp,diag_syev_i8,diag_dseupd,diag_dseupd_p + public diag_tridiag,diag_tridiag_pack,diag_dsyev_i8,diag_dgelss,diag_syev_ilp,diag_syev_i8 public dseupd_omp_arpack,diag_propack,daprod ! integer,parameter:: verbose = 4 @@ -2558,456 +2558,6 @@ end subroutine matvec_p - subroutine diag_dseupd_p(n,bterm,nroots,factor,maxitr_,tol,h,e) - - integer,intent(in) :: n,bterm(n,2),maxitr_ - integer,intent(inout) :: nroots - double precision,intent(in) :: factor - double precision,intent(in) :: tol - double precision,intent(inout) :: h(:,:) - double precision,intent(out) :: e(nroots) - - double precision,allocatable :: v(:,:),workl(:),workd(:),d(:,:),resid(:) - ! - logical,allocatable :: select(:) - integer(ik) :: iparam(11), ipntr(11),ldv,iter - ! - integer,parameter :: maxnprocs=1024 - ! - integer(ik) :: kstart(0:maxnprocs),iproc,dx,i,jproc - ! - character(len=1) :: bmat - character(len=2) :: which - character(len=cl) :: blacs_or_mpi = 'NONE' - ! - integer(ik) :: ido, nev, ncv, lworkl, info, ierr, j, & - mode, ishfts, alloc, maxitr - ! - logical rvec - double precision :: sigma - - ! - !double precision,external :: dnrm2 - ! - -! -!----------------------------------------------------------------------- -! - integer logfil, ndigit, mgetv0,& - msaupd, msaup2, msaitr, mseigt, msapps, msgets, mseupd,& - mnaupd, mnaup2, mnaitr, mneigh, mnapps, mngets, mneupd,& - mcaupd, mcaup2, mcaitr, mceigh, mcapps, mcgets, mceupd - common /debug/ & - logfil, ndigit, mgetv0,& - msaupd, msaup2, msaitr, mseigt, msapps, msgets, mseupd,& - mnaupd, mnaup2, mnaitr, mneigh, mnapps, mngets, mneupd,& - mcaupd, mcaup2, mcaitr, mceigh, mcapps, mcgets, mceupd - - real t0, t1, t2, t3, t4, t5 - save t0, t1, t2, t3, t4, t5 -! - integer nopx, nbx, nrorth, nitref, nrstrt - real tsaupd, tsaup2, tsaitr, tseigt, tsgets, tsapps, tsconv,& - tnaupd, tnaup2, tnaitr, tneigh, tngets, tnapps, tnconv,& - tcaupd, tcaup2, tcaitr, tceigh, tcgets, tcapps, tcconv,& - tmvopx, tmvbx, tgetv0, titref, trvec - common /timing/ & - nopx, nbx, nrorth, nitref, nrstrt,& - tsaupd, tsaup2, tsaitr, tseigt, tsgets, tsapps, tsconv,& - tnaupd, tnaup2, tnaitr, tneigh, tngets, tnapps, tnconv,& - tcaupd, tcaup2, tcaitr, tceigh, tcgets, tcapps, tcconv,& - tmvopx, tmvbx, tgetv0, titref, trvec - - - -! %-----------------% -! | BLACS INTERFACE | -! %-----------------% -! - integer comm, iam, nprocs, nloc, & - nprow, npcol, myprow, mypcol -! - !external BLACS_PINFO, BLACS_SETUP, BLACS_GET, & - ! BLACS_GRIDINIT, BLACS_GRIDINFO - !integer,parameter :: MPI_COMM_WORLD=0 -! - -! %---------------% -! | MPI INTERFACE | -! %---------------% - - integer myid, rc -! include 'mpif.h' - - -! -! %----------------------------------------------% -! | Local Buffers needed for BLACS communication | -! %----------------------------------------------% -! - Double precision,allocatable :: mv_buf(:) -! -! %------------% -! | Parameters | -! %------------% -! - Double precision zero - parameter (zero = 0.0) -! -! %-----------------------------% -! | BLAS & LAPACK routines used | -! %-----------------------------% -! - Double precision pdnorm2 - external pdnorm2, daxpy -! -! %---------------------% -! | Intrinsic Functions | -! %---------------------% -! - intrinsic abs -! -! %-----------------------% -! | Executable Statements | -! %-----------------------% -! - - write(out,"('Start PARPACK-diagonalization')") - - -#if (blacs_ > 0) - write(out,"('BLAS-PINFO-start')") - call BLACS_PINFO( iam, nprocs ) - print *,nprocs - blacs_or_mpi = 'BLACS' -#endif - ! - write(out,"('BLAS-PINFO-done')") - ! - !call BLACS_PINFO( iam, nprocs ) - !print *,nprocs - -#if (mpi_ > 0) - call MPI_INIT( ierr ) - comm = MPI_COMM_WORLD - call MPI_COMM_RANK( comm, myid, ierr ) - call MPI_COMM_SIZE( comm, nprocs, ierr ) - ! - print *,comm,myid,nprocs - ! - if (trim(blacs_or_mpi)=='BLACS') then - write(out,"('diag_dseupd_p: IT IS ILLEGAL TO USE MPI AND BLACS AT THE SAME TIME')") - stop 'diag_dseupd_p: IT IS ILLEGAL TO USE MPI AND BLACS AT THE SAME TIME' - endif - ! - blacs_or_mpi = 'MPI' - ! -#endif - ! - - -! -! If in PVM, create virtual machine if it doesn't exist -! - if (nprocs .lt. 1) then - nprocs = 1 -#if (blacs_ > 0) - call BLACS_SETUP( iam, nprocs ) -#endif - ! - print *,nprocs - ! - endif - if (nprocs >maxnprocs) stop 'nprocs > maxnprocs' - ! - ! Set up processors in 1D Grid - ! - nprow = nprocs - npcol = 1 - ! - do iproc = 0,nprocs - ! - kstart(iproc) = iproc*(n/nprocs+1)+1 - if (iproc>=mod(n,nprocs)) kstart(iproc) = iproc*(n/nprocs)+1+mod(n,nprocs) - print *,iproc,kstart(iproc) - ! - enddo - ! - if (verbose>=2) call TimerStart('diag_dseupd_p: diagonalization') - ! - nev = nroots - ! - if (nroots==0) nev=max(100,n) - ! - ncv = factor*nev - ! - if (ncv==nev) ncv = int(21*nev/10)+1 - ! - ncv = min(n,ncv) - ! - bmat = 'I' - which = 'SM' - ! - ! The work array WORKL is used in DSAUPD as workspace. - ! The parameter TOL determines the stopping criterion. - ! If TOL<=0, machine precision is used. The variable IDO is used for - ! reverse communication and is initially set to 0. - ! Setting INFO=0 indicates that a random vector is generated in DSAUPD - ! to start the Arnoldi iteration. - ! - - lworkl = ncv*(ncv+10) - ldv = n - ! - info = 0 - ido = 0 - ! - allocate(v(ldv,ncv), workl(lworkl),workd(3*n),d(ncv,2),resid(n),select(ncv),mv_buf(n),stat=alloc) - call ArrayStart('diag_dseupd_p',alloc,size(v),kind(v)) - call ArrayStart('diag_dseupd_p',alloc,size(workl),kind(workl)) - call ArrayStart('diag_dseupd_p',alloc,size(workd),kind(workd)) - call ArrayStart('diag_dseupd_p',alloc,size(d),kind(d)) - call ArrayStart('diag_dseupd_p',alloc,size(resid),kind(resid)) - call ArrayStart('diag_dseupd_p',alloc,size(select),kind(select)) - call ArrayStart('diag_dseupd_p',alloc,size(mv_buf),kind(mv_buf)) - - ! - ! Estimate block overlapping: - ! - dx = 0 - ! - do iproc = 0,nprocs-1 - ! - do i = kstart(iproc),kstart(iproc+1)-1 - ! - do jproc = 0,iproc-1 - ! - if (bterm(i,1)kstart(jproc)) dx = max(dx,jproc-iproc) - ! - enddo - ! - enddo - ! - enddo - ! -! -! Get default system context, and define grid -! - ! - myprow = 1 ; mypcol = 1 ; myid = 1 - ! -#if (blacs_ > 0) - call BLACS_GET( 0, 0, comm ) - call BLACS_GRIDINIT( comm, 'Row', nprow, npcol ) - call BLACS_GRIDINFO( comm, nprow, npcol, myprow, mypcol ) - ! - write(out,"(' myprow, nprow, mypcol, npcol = ',4i8)") myprow, nprow, mypcol, npcol - ! -#endif - ! - if (verbose>=2.and.trim(blacs_or_mpi)=='BLACS') then - write(out,"('myprow, nprow, mypcol, npcol, nprocs = ',5i8)") myprow, nprow, mypcol, npcol, nprocs - endif - ! - if (verbose>=2.and.trim(blacs_or_mpi)=='MPI') write(out,"('myid,nproc = ',2i8)") myid,nprocs -! ! -! If I'm not in grid, go to end of program -! - if ( trim(blacs_or_mpi)=='BLACS'.and.(myprow .ge. nprow) .or. (mypcol .ge. npcol) ) then - write(out,"('not in grid, myprow, nprow, mypcol, npcol = ',4i8)") myprow, nprow, mypcol, npcol - return - endif -! - ndigit = -3 - logfil = 6 - msaupd = 1 - - - !--------------------------------------! - ! Set up distribution of data to nodes ! - !--------------------------------------! - - nloc = (n/nprocs) - if ( mod(n,nprocs)>myprow ) nloc = nloc + 1 - ! - ! - !allocate(mv_buf(n),stat=alloc) - !call ArrayStart('diag_dseupd_p',alloc,size(mv_buf),kind(mv_buf)) - ! - iparam = 0 - ipntr = 0 - ! - ! This routone uses exact shifts with respect to - ! the current Hessenberg matrix (IPARAM(1) = 1). - ! IPARAM(3) specifies the maximum number of Arnoldi - ! iterations allowed. Mode 1 of DSAUPD is used - ! (IPARAM(7) = 1). - ! - ishfts = 1 - maxitr = maxitr_*nev - mode = 1 - ! - iparam(1) = ishfts - iparam(3) = maxitr - iparam(7) = mode - ! - ! - ! M A I N L O O P (Reverse communication) - ! - !if (verbose>=4) write(out,"(/'Arpack: eigenvalues computed:')") - ! - if (verbose>=3) write(out,"(/'Arpack: n = ',i8,' nev = ',i8,' maxitr = ',i8,' tol = ',e12.5)") n,nev,maxitr,tol - ! - iter = 0 - ! - do while(ido<=1) - ! - iter = iter + 1 - ! - if (verbose>=3.and.mod(iter,10)==0) write(out,"(' iter = ',i8)") iter - ! - ! Repeatedly call the routine DSAUPD and take - ! actions indicated by parameter IDO until - ! either convergence is indicated or maxitr - ! has been exceeded. - ! - -#if (blacs_ > 0 || mpi_ > 0) - ! - call pdsaupd ( comm, ido, bmat, nloc, which, nev, tol, resid, & - ncv, v, ldv, iparam, ipntr, workd, workl, & - lworkl, info ) - ! -#else - ! - call dsaupd ( ido, bmat, n, which, nev, tol, resid, & - ncv, v, ldv, iparam, ipntr, workd, workl, & - lworkl, info ) - ! -#endif - ! - !if (verbose>=4.and.iparam(5)>0) then - ! ! - ! write(out,"(i8)") iparam(5) - ! ! - !endif - ! - if (ido==-1 .or. ido==1) then - ! - ! Perform matrix vector multiplication - ! y <--- OP*x - ! - call matvec_p(comm,n,nloc,nprocs,kstart(0:nprocs),dx,bterm,h,mv_buf,workd(ipntr(1)),workd(ipntr(2))) - ! - endif - ! - enddo - ! - ! Either we have convergence or there is an error. - ! - if ( info < 0 ) then - write(out,"(/'Error with _saupd, info = ',i8)") info - stop 'Error with _saupd' - endif - ! - ! - ! No fatal errors occurred. Post-Process using DSEUPD. - ! Computed eigenvalues may be extracted. - ! - ! Eigenvectors may also be computed now if desired. (indicated by rvec = .true.) - ! - rvec = .true. - ! -#if (blacs_ > 0) - ! - if (verbose>=5) write(out,"(/'Arpack: dseupd')") - ! - call pdseupd ( comm, rvec, 'All', select, d, v, ldv, sigma, & - bmat, n, which, nev, tol, resid, ncv, v, ldv, & - iparam, ipntr, workd, workl, lworkl, ierr ) - - - if (verbose>=5) write(out,"(/'Arpack: done!')") - - ! -#else - ! - write(out,"(/'Arpack was not activated yet. Please uncomment dsaupd and dseupd bellow')") - stop 'Arpack was not activated' - ! -#endif - ! - if ( ierr < 0 ) then - write(out,"(/'Error with_seupd, info = ',i8)") ierr - stop 'Error with _saupd' - endif - ! - nroots = iparam(5) - ! - e(1:nroots) = d(1:nroots,1) - ! - !$omp parallel do private(j) shared(h) schedule(dynamic) - do j=1,n - h(j,1:nroots) = v(j,1:nroots) - enddo - !$omp end parallel do - ! - ! Eigenvalues are returned in the first column - ! of the two dimensional array D and the - ! corresponding eigenvectors are returned in - ! the first NEV columns of the two dimensional - ! array V if requested. Otherwise, an - ! orthogonal basis for the invariant subspace - ! corresponding to the eigenvalues in D is - ! returned in V. - ! - !do j=1, nconv - ! - ! Compute the residual norm || A*x - lambda*x || - ! for the NCONV accurately computed eigenvalues and - ! eigenvectors. (iparam(5) indicates how many are - ! accurate to the requested tolerance) - ! - !call matvec(n,bterm,v(1,j), ax) - ! - !call daxpy(n, -d(j,1), v(1,j), 1, ax, 1) - !d(j,2) = dnrm2(n, ax, 1) - !d(j,2) = d(j,2) / abs(d(j,1)) - ! - !enddo - -! -! %---------------------------% -! | Done with program pdsdrv1.| -! %---------------------------% -! - 9000 continue -! -#if (blacs_ > 0) - call BLACS_GRIDEXIT ( comm ) - call BLACS_EXIT(0) -#endif -#if (mpi_ > 0) - call MPI_FINALIZE(rc) -#endif - - - deallocate(v,workl,workd,d,resid,select,mv_buf) - ! - call ArrayStop('diag_dseupd_p') - ! - if (verbose>=2) call TimerStop('diag_dseupd_p: diagonalization') - ! - end subroutine diag_dseupd_p - - - subroutine dseupd_omp_arpack(n,bterm,nroots,factor,maxitr_,tol,h,e) integer,intent(in) :: n,bterm(n,2),maxitr_ @@ -3270,9 +2820,6 @@ subroutine dseupd_omp_arpack(n,bterm,nroots,factor,maxitr_,tol,h,e) end subroutine dseupd_omp_arpack - - - subroutine diag_propack(n,bterm,nroots,factor,maxiter,iverbose,tol,h,e) ! integer,intent(in) :: n,bterm(n,2),maxiter,iverbose diff --git a/perturbation.f90 b/perturbation.f90 index fb5ae4e..b657695 100644 --- a/perturbation.f90 +++ b/perturbation.f90 @@ -10616,8 +10616,6 @@ subroutine diagonalization_contract(jrot,gamma,dimen_s,mat,zpe,rlevel,total_root stop 'diagonalization_contract, mat - too small' endif ! - !call diag_dseupd_p(dimen_s,bterm,nroots,job%factor,job%maxiter,job%tolerance,mat,energy) - ! call dseupd_omp_arpack(dimen_s,bterm,nroots,job%factor,job%maxiter,job%tolerance,mat,energy) ! case('SEUPD') From 38887a7cc3b28b0d581a35baebe8e2693dd1f25a Mon Sep 17 00:00:00 2001 From: Jamie Quinn Date: Thu, 29 Apr 2021 16:50:41 +0100 Subject: [PATCH 37/79] Allow MPI code to compile without MPI enabled Previously this code required MPI libraries to be linked. This is no longer the case and the code should compile without MPI being installed on the system. This is done through a combination of calls to dummy (when not using MPI) functions and `#ifdef TROVE_USE_MPI_` directives. --- mpi_aux.f90 | 27 ++++++++++++++++--- perturbation.f90 | 56 ++++++++++++++++++++++++++++++++++++---- tran.f90 | 67 ++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 142 insertions(+), 8 deletions(-) diff --git a/mpi_aux.f90 b/mpi_aux.f90 index 252a80f..140d5e2 100644 --- a/mpi_aux.f90 +++ b/mpi_aux.f90 @@ -17,7 +17,7 @@ module mpi_aux public mpi_real_size, mpi_int_size #ifndef TROVE_USE_MPI_ - public MPI_Datatype + public MPI_Datatype, MPI_File, MPI_Status, MPI_Request #endif interface co_sum @@ -33,6 +33,20 @@ module mpi_aux type MPI_Datatype integer :: dummy = 0 end type MPI_Datatype + + type MPI_File + integer :: dummy = 0 + end type MPI_File + + type MPI_Status + integer :: dummy = 0 + end type MPI_Status + + type MPI_Request + integer :: dummy = 0 + end type MPI_Request + + parameter MPI_OFFSET_KIND=8 #endif integer,dimension(:),allocatable :: proc_sizes, proc_offsets, send_or_recv @@ -378,8 +392,9 @@ subroutine co_read_matrix_distr_ordered(x, longdim, lb, ub, infile) real(rk),dimension(:,lb:),intent(out) :: x integer,intent(in) :: longdim, lb, ub - type(MPI_File),intent(inout) :: infile + +#ifdef TROVE_USE_MPI_ type(MPI_Status) :: writestat integer(kind=MPI_Offset_kind) :: offset_start,offset_end integer :: readcount, ierr @@ -391,6 +406,7 @@ subroutine co_read_matrix_distr_ordered(x, longdim, lb, ub, infile) call MPI_File_read_ordered(infile,x,1,mpitype_column,writestat,ierr) call TimerStop('MPI_read_matrix') +#endif end subroutine co_read_matrix_distr_ordered @@ -398,8 +414,9 @@ subroutine co_read_matrix_distr(x, longdim, lb, ub, infile) real(rk),dimension(:,lb:),intent(out) :: x integer,intent(in) :: longdim, lb, ub - type(MPI_File),intent(inout) :: infile + +#ifdef TROVE_USE_MPI_ type(MPI_Status) :: writestat integer(kind=MPI_Offset_kind) :: offset_start,offset_end integer :: readcount, ierr @@ -420,6 +437,7 @@ subroutine co_read_matrix_distr(x, longdim, lb, ub, infile) endif call TimerStop('MPI_read_matrix') +#endif end subroutine co_read_matrix_distr @@ -428,6 +446,8 @@ subroutine co_write_matrix_distr(x, longdim, lb, ub, outfile) real(rk),dimension(:,lb:),intent(in) :: x integer,intent(in) :: longdim, lb, ub type(MPI_File),intent(inout) :: outfile + +#ifdef TROVE_USE_MPI_ integer :: writecount, ierr integer(kind=MPI_Offset_kind) :: offset_start, offset_end type(MPI_Status) :: writestat @@ -439,6 +459,7 @@ subroutine co_write_matrix_distr(x, longdim, lb, ub, outfile) call MPI_File_write_ordered(outfile,x,1,mpitype_column,writestat,ierr) call TimerStop('MPI_write_matrix') +#endif end subroutine co_write_matrix_distr diff --git a/perturbation.f90 b/perturbation.f90 index b657695..ba17da5 100644 --- a/perturbation.f90 +++ b/perturbation.f90 @@ -7725,12 +7725,15 @@ end subroutine PThamiltonian_contract !!!!!!!!!!!!!!!!!!!!!!!!!!!!!! subroutine open_chkptfile_mpi(fileh, filename, mode) +#ifdef TROVE_USE_MPI_ use mpi_f08 +#endif use mpi_aux type(MPI_File),intent(inout) :: fileh character(len=*),intent(in) :: filename, mode +#ifdef TROVE_USE_MPI_ integer :: amode, ierr select case(mode) @@ -7748,13 +7751,17 @@ subroutine open_chkptfile_mpi(fileh, filename, mode) !File errors indicate big trouble, so we set errors to be fatal - not likely to be recoverable call MPI_File_set_errhandler(fileh, MPI_ERRORS_ARE_FATAL) +#endif end subroutine open_chkptfile_mpi subroutine close_chkptfile_mpi(fileh) +#ifdef TROVE_USE_MPI_ use mpi_f08 +#endif use mpi_aux type(MPI_File), intent(inout) :: fileh +#ifdef TROVE_USE_MPI_ integer :: ierr call mpi_file_close(fileh, ierr) @@ -7762,26 +7769,25 @@ subroutine close_chkptfile_mpi(fileh) if (mpi_rank .eq. 0) write(*,*) "Error closing MPI-IO-formatted Vib. kinetic checkpoint file." stop "MPI_PTrestore_rot_kinetic_matrix_elements - Error closing MATELEM MPI-IO input file" endif +#endif end subroutine close_chkptfile_mpi subroutine PTrestore_rot_kinetic_matrix_elements_mpi(jrot, task, fileh, dimen, & ncontr, maxcontr, icontr) +#ifdef TROVE_USE_MPI_ use mpi_f08 +#endif use mpi_aux integer(ik),intent(in) :: jrot character(len=cl),intent(in) :: task type(MPI_File),intent(inout) :: fileh integer(ik),intent(in) :: dimen - !type(PTelementsT),intent(in) :: PT - !logical,intent(in) :: PTvibrational_me_calc - !type(PTcontrME),pointer :: grot(:,:),gcor(:) ! rot. kinetic part - !type(PTcontrME) :: hvib ! rot. kinetic part - integer(ik),intent(inout),optional :: ncontr integer(ik),intent(inout),optional :: maxcontr integer(ik),intent(in),optional :: icontr +#ifdef TROVE_USE_MPI_ type(MPI_File) :: fileh_slice character(len=cl) :: job_id,filename,readbuf integer(kind=MPI_Offset_kind) :: file_offset @@ -8336,16 +8342,20 @@ subroutine PTrestore_rot_kinetic_matrix_elements_mpi(jrot, task, fileh, dimen, & !! endif !! ! end select +#endif end subroutine subroutine divided_slice_open_mpi(islice,fileh,chkpt_type,suffix) +#ifdef TROVE_USE_MPI_ use mpi_f08 +#endif use mpi_aux implicit none integer(ik),intent(in) :: islice type(MPI_File),intent(inout) :: fileh character(len=*),intent(in) :: chkpt_type,suffix +#ifdef TROVE_USE_MPI_ integer(ik) :: ilen character(len=cl) :: readbuf,filename,jchar integer :: ierr @@ -8369,15 +8379,20 @@ subroutine divided_slice_open_mpi(islice,fileh,chkpt_type,suffix) if (mpi_rank .eq. 0) write (out,"(' [MPI] kinetic checkpoint slice ',a20,': header is missing or wrong',a)") filename,readbuf(1:ilen) stop 'PTrestore_rot_kinetic_matrix_elements_mpi - in slice - header missing or wrong' end if +#endif end subroutine divided_slice_open_mpi subroutine divided_slice_close_mpi(islice,fileh,chkpt_type) +#ifdef TROVE_USE_MPI_ use mpi_f08 +#endif + use mpi_aux integer(ik),intent(in) :: islice type(MPI_File),intent(inout) :: fileh character(len=*),intent(in) :: chkpt_type +#ifdef TROVE_USE_MPI_ integer(ik) :: ilen character(len=cl) :: readbuf integer :: ierr @@ -8393,6 +8408,7 @@ subroutine divided_slice_close_mpi(islice,fileh,chkpt_type) end if ! call close_chkptfile_mpi(fileh) +#endif end subroutine divided_slice_close_mpi !!!!!!!!!!!!!!!!!!!!!!!!!!!!!! @@ -15408,6 +15424,7 @@ subroutine PTcontracted_matelem_class(jrot) ! job_is ='Vib. matrix elements of the rot. kinetic part' if (trim(job%kinetmat_format).eq.'MPIIO') then +#ifdef TROVE_USE_MPI_ call MPI_File_open(mpi_comm_world, job%kinetmat_file, mpi_mode_wronly+mpi_mode_create, mpi_info_null, chkptMPIIO, ierr) call MPI_File_set_errhandler(chkptMPIIO, MPI_ERRORS_ARE_FATAL) @@ -15432,6 +15449,7 @@ subroutine PTcontracted_matelem_class(jrot) endif call MPI_Barrier(mpi_comm_world, ierr) call MPI_File_seek_shared(chkptMPIIO, mpioffset, MPI_SEEK_END, ierr) +#endif else call IOStart(trim(job_is),chkptIO) ! @@ -15620,12 +15638,14 @@ subroutine PTcontracted_matelem_class(jrot) if (trim(job%IOkinet_action)=='SAVE'.and..not.job%IOmatelem_split) then ! if (trim(job%kinetmat_format).eq.'MPIIO') then +#ifdef TROVE_USE_MPI_ if(mpi_rank.eq.0) then !call MPI_File_seek(chkptMPIIO, mpioffset, MPI_SEEK_END) call MPI_File_write_shared(chkptMPIIO,'g_rot',5,mpi_character,mpi_status_ignore,ierr) !else ! call MPI_File_write_shared(chkptMPIIO,'',0,mpi_character,mpi_status_ignore,ierr) endif +#endif else write(chkptIO) 'g_rot' endif @@ -15708,12 +15728,14 @@ subroutine PTcontracted_matelem_class(jrot) if (trim(job%IOkinet_action)=='SAVE'.and..not.job%IOmatelem_split) then ! if (trim(job%kinetmat_format).eq.'MPIIO') then +#ifdef TROVE_USE_MPI_ ! call MPI_File_seek(chkptMPIIO, mpioffset, MPI_SEEK_END) if(mpi_rank.eq.0) then call MPI_File_write_shared(chkptMPIIO,'g_cor',5,mpi_character,mpi_status_ignore,ierr) !else ! call MPI_File_write_shared(chkptMPIIO,'',0,mpi_character,mpi_status_ignore,ierr) endif +#endif else write(chkptIO) 'g_cor' endif @@ -16179,6 +16201,7 @@ subroutine PTcontracted_matelem_class(jrot) (.not.job%IOmatelem_split.or.job%iswap(1)==0)) then ! if (trim(job%kinetmat_format).eq.'MPIIO') then +#ifdef TROVE_USE_MPI_ if(mpi_rank.eq.0) then !call MPI_File_seek(chkptMPIIO, mpioffset, MPI_SEEK_END) call MPI_File_write_shared(chkptMPIIO,'hvib',4,mpi_character,mpi_status_ignore,ierr) @@ -16186,6 +16209,7 @@ subroutine PTcontracted_matelem_class(jrot) ! call MPI_File_write_shared(chkptMPIIO,'',0,mpi_character,mpi_status_ignore,ierr) endif call co_write_matrix_distr(hvib%me,mdimen, startdim, enddim,chkptMPIIO) +#endif else write(chkptIO) 'hvib' write(chkptIO) hvib%me @@ -16202,12 +16226,14 @@ subroutine PTcontracted_matelem_class(jrot) (.not.job%IOmatelem_split.or.job%iswap(1)==0 ) ) then ! if (trim(job%kinetmat_format).eq.'MPIIO') then +#ifdef TROVE_USE_MPI_ call mpi_barrier(mpi_comm_world, ierr) call MPI_File_seek_shared(chkptMPIIO, mpioffset, MPI_SEEK_END) if(mpi_rank.eq.0) then call MPI_File_write_shared(chkptMPIIO,'End Kinetic part',16,mpi_character,mpi_status_ignore,ierr) endif call MPI_File_close(chkptMPIIO, ierr) +#endif else write(chkptIO) 'End Kinetic part' close(chkptIO,status='keep') @@ -16238,6 +16264,7 @@ subroutine PTcontracted_matelem_class(jrot) ! job_is ='external field contracted matrix elements for J=0' if (trim(job%kinetmat_format).eq.'MPIIO') then +#ifdef TROVE_USE_MPI_ call MPI_File_open(mpi_comm_world, job%extFmat_file, mpi_mode_wronly+mpi_mode_create, mpi_info_null, chkptMPIIO, ierr) mpioffset=0 call MPI_File_set_size(chkptMPIIO, mpioffset, ierr) @@ -16262,6 +16289,7 @@ subroutine PTcontracted_matelem_class(jrot) ! ! ! call TimerStop('mpiiosingle') !AT endif +#endif else call IOStart(trim(job_is),chkptIO) ! @@ -16328,12 +16356,14 @@ subroutine PTcontracted_matelem_class(jrot) ! always store the matrix elements of the extF moment ! if (trim(job%kinetmat_format).eq.'MPIIO') then +#ifdef TROVE_USE_MPI_ if(mpi_rank.eq.0) then call MPI_File_write_shared(chkptMPIIO,imu,1,mpi_integer,mpi_status_ignore,ierr) endif !call mpi_barrier(mpi_comm_world,ierr) ! call co_write_matrix_distr(extF_t,mdimen, startdim, enddim,chkptMPIIO) +#endif else write(chkptIO) imu ! @@ -16381,8 +16411,11 @@ subroutine PTcontracted_matelem_class(jrot) ! endif ! +#ifdef TROVE_USE_MPI_ call mpi_barrier(mpi_comm_world,ierr) +#endif if (trim(job%kinetmat_format).eq.'MPIIO') then +#ifdef TROVE_USE_MPI_ if (mpi_rank.eq.0) then !AT if(.not.job%IOextF_divide) then !call MPI_File_seek(chkptMPIIO, mpioffset, MPI_SEEK_END) @@ -16392,6 +16425,7 @@ subroutine PTcontracted_matelem_class(jrot) endif endif call MPI_File_close(chkptMPIIO, ierr) +#endif else if (.not.job%IOextF_divide) write(chkptIO) 'End external field' endif @@ -16521,6 +16555,8 @@ subroutine write_divided_slice_mpi(islice,name,suffix,N,field) character(len=*),intent(in) :: name,suffix integer(ik),intent(in) :: N real(rk),dimension(:,:),intent(in) :: field + +#ifdef TROVE_USE_MPI_ character(len=4) :: jchar character(len=cl) :: filename character(len=cl) :: job_is @@ -16550,6 +16586,7 @@ subroutine write_divided_slice_mpi(islice,name,suffix,N,field) ! call MPI_File_close(chkptMPIIO, ierr) ! +#endif end subroutine write_divided_slice_mpi @@ -16597,6 +16634,8 @@ subroutine divided_slice_open_mpi(islice,chkptIO,name,suffix) integer(ik),intent(in) :: islice type(MPI_File),intent(inout) :: chkptIO character(len=*),intent(in) :: name,suffix + +#ifdef TROVE_USE_MPI_ character(len=4) :: jchar character(len=cl) :: buf,filename,job_is integer(ik) :: ilen @@ -16628,6 +16667,7 @@ subroutine divided_slice_open_mpi(islice,chkptIO,name,suffix) !stop 'PTrestore_rot_kinetic_matrix_elements - in slice - header missing or wrong' endif endif +#endif end subroutine divided_slice_open_mpi ! subroutine divided_slice_close(islice,chkptIO,name) @@ -16668,15 +16708,19 @@ subroutine divided_slice_close_mpi(islice,chkptIO,name) ilen = LEN_TRIM(name) ! if(mpi_rank .eq. 0) then +#ifdef TROVE_USE_MPI_ call MPI_File_read_shared(chkptIO, buf, ilen, mpi_character, mpi_status_ignore, ierr) if ( trim(buf(1:ilen))/=trim(name) ) then write (out,"(' divided_slice_close, kinetic checkpoint slice ',a,': footer is missing or wrong',a)") trim(name),buf(1:ilen) call MPI_Abort(mpi_comm_world, 1) !stop 'divided_slice_close - in slice - footer missing or wrong' endif +#endif endif ! +#ifdef TROVE_USE_MPI_ call MPI_File_close(chkptIO, ierr) +#endif ! end subroutine divided_slice_close_mpi @@ -34051,6 +34095,7 @@ subroutine PTstoreMPI_icontr_cnu(maxcontracts,iunit,dir) ! case ('SAVE') ! +#ifdef TROVE_USE_MPI_ call mpi_file_write(iunit, maxcontracts, 1,mpi_integer,mpi_status_ignore,ierr) ! call mpi_file_write(iunit, 'icontr_cnu', 10,mpi_character,mpi_status_ignore,ierr) @@ -34063,6 +34108,7 @@ subroutine PTstoreMPI_icontr_cnu(maxcontracts,iunit,dir) call mpi_file_write(iunit, pt%icontr_ideg(0:pt%nclasses,1:maxcontracts), (1+pt%nclasses)*maxcontracts, mpi_integer, & mpi_status_ignore, ierr) +#endif end select end subroutine PTstorempi_icontr_cnu diff --git a/tran.f90 b/tran.f90 index 2e4694a..3d89204 100644 --- a/tran.f90 +++ b/tran.f90 @@ -1369,7 +1369,9 @@ subroutine TRconvert_matel_j0_eigen(jrot) !$omp parallel do private(icoeff,irow,ib,iterm,ielem,blacs_row,blacs_col,i_local,j_local) default(shared) schedule(dynamic) do icoeff = 1,dimen ! +#ifdef TROVE_USE_MPI_ call infog2l(icoeff,iroot,desc_psi,nprow,npcol,myprow,mypcol,i_local,j_local,blacs_row,blacs_col) +#endif if (myprow.eq.blacs_row.and.mypcol.eq.blacs_col) then ! psi(i_local,j_local) = 0 @@ -1469,7 +1471,9 @@ subroutine TRconvert_matel_j0_eigen(jrot) read(iunit, rec = irec) vec ! do i=1,dimen +#ifdef TROVE_USE_MPI_ call infog2l(i,iroot,desc_psi,nprow,npcol,myprow,mypcol,i_local,j_local,blacs_row,blacs_col) +#endif if (myprow.eq.blacs_row.and.mypcol.eq.blacs_col) then psi(i_local,j_local) = vec(i) endif @@ -1486,9 +1490,11 @@ subroutine TRconvert_matel_j0_eigen(jrot) ! ! TODO TEMP intel bugaround - explicitly transpose psi into psi_t if (blacs_size .gt. 1) then +#ifdef TROVE_USE_MPI_ write(out,*) "Explicitly transposing psi into psi_t" call pdtran(Neigenroots, dimen, 1.0d0, psi, 1, 1, desc_psi, 0.0d0, psi_t, & 1, 1, desc_mat_t) +#endif endif ! if (job%verbose>=3) write(out,"(' ...done!')") @@ -1507,6 +1513,7 @@ subroutine TRconvert_matel_j0_eigen(jrot) job_is ='Eigen-vib. matrix elements of the rot. kinetic part' ! if (trim(job%kinetmat_format).eq.'MPIIO') then +#ifdef TROVE_USE_MPI_ call MPI_File_open(mpi_comm_world, job%kineteigen_file, mpi_mode_wronly+mpi_mode_create, mpi_info_null, fileh_w, ierr) call MPI_File_set_errhandler(fileh_w, MPI_ERRORS_ARE_FATAL) mpioffset = 0 @@ -1527,6 +1534,7 @@ subroutine TRconvert_matel_j0_eigen(jrot) mpioffset = 0 treat_vibration = .false. endif +#endif else call IOStart(trim(job_is),chkptIO) ! @@ -1576,7 +1584,9 @@ subroutine TRconvert_matel_j0_eigen(jrot) endif ! +#ifdef TROVE_USE_MPI_ call MPI_File_open(mpi_comm_world, job%kinetmat_file, mpi_mode_rdonly, mpi_info_null, fileh, ierr) +#endif ! The eigen-vibrational (J=0) matrix elements of the rotational and coriolis ! kinetic parts are being computed here. ! @@ -1587,11 +1597,13 @@ subroutine TRconvert_matel_j0_eigen(jrot) task = 'rot' ! if (trim(job%kinetmat_format).eq.'MPIIO') then +#ifdef TROVE_USE_MPI_ call MPI_File_seek_shared(fileh_w, int(0,MPI_OFFSET_KIND), MPI_SEEK_END) if(mpi_rank.eq.0) call MPI_File_write_shared(fileh_w, 'g_rot', 5, mpi_character, mpi_status_ignore, ierr) call mpi_barrier(MPI_COMM_WORLD, ierr) ! call restore_rot_kinetic_matrix_elements_mpi(jrot,treat_vibration,task,fileh) +#endif else write(chkptIO) 'g_rot' ! @@ -1617,12 +1629,14 @@ subroutine TRconvert_matel_j0_eigen(jrot) islice = 0 ! if ((.not.job%IOmatelem_split) .and. blacs_size.gt.1) then +#ifdef TROVE_USE_MPI_ call MPI_File_get_position(fileh, read_offset, ierr) call MPI_File_set_view(fileh, read_offset, mpi_byte, gmat_block_type, "native", MPI_INFO_NULL, ierr) !call MPI_File_seek_shared(fileh_w, int(0,MPI_OFFSET_KIND),MPI_SEEK_END) call MPI_File_get_position_shared(fileh_w, write_offset, ierr) call MPI_File_set_view(fileh_w, write_offset, mpi_byte, mat_s_block_type, "native", MPI_INFO_NULL, ierr) +#endif endif ! do k1 = 1,3 @@ -1648,7 +1662,9 @@ subroutine TRconvert_matel_j0_eigen(jrot) else ! if (trim(job%kinetmat_format).eq.'MPIIO') then +#ifdef TROVE_USE_MPI_ call MPI_File_read_all(fileh, gmat, size(gmat), mpi_double_precision, mpi_status_ignore, ierr) +#endif else read(iunit) gmat endif @@ -1657,10 +1673,12 @@ subroutine TRconvert_matel_j0_eigen(jrot) ! ! if (blacs_size.gt.1) then +#ifdef TROVE_USE_MPI_ call pdgemm('T','N',Neigenroots,dimen,dimen,alpha,psi,1,1,desc_psi,& gmat,1,1,desc_gmat,beta,mat_t,1,1,desc_mat_t) call pdgemm('N','T',Neigenroots,Neigenroots,dimen,alpha,mat_t,1,1,desc_mat_t,& psi_t,1,1,desc_mat_t,beta,mat_s,1,1,desc_mat_s) +#endif else call dgemm('T','N',Neigenroots,dimen,dimen,alpha,psi,dimen,& gmat,dimen,beta,mat_t,Neigenroots) @@ -1683,7 +1701,9 @@ subroutine TRconvert_matel_j0_eigen(jrot) else ! if (trim(job%kinetmat_format).eq.'MPIIO') then +#ifdef TROVE_USE_MPI_ call MPI_File_write_all(fileh_w, mat_s, size(mat_s), mpi_double_precision, mpi_status_ignore, ierr) +#endif else write (chkptIO) mat_s endif @@ -1696,6 +1716,7 @@ subroutine TRconvert_matel_j0_eigen(jrot) ! ! Reset view to flat file if ((.not.job%IOmatelem_split) .and. blacs_size.gt.1) then +#ifdef TROVE_USE_MPI_ read_offset = read_offset + 9*int(dimen,MPI_OFFSET_KIND)*dimen*mpi_real_size call MPI_File_set_view(fileh, int(0,MPI_OFFSET_KIND), mpi_byte, mpi_byte, "native", MPI_INFO_NULL, ierr) call MPI_File_seek(fileh, read_offset, MPI_SEEK_SET) @@ -1706,6 +1727,7 @@ subroutine TRconvert_matel_j0_eigen(jrot) call MPI_File_seek_shared(fileh_w, write_offset, MPI_SEEK_SET) !write_offset = 0 !call MPI_File_seek_shared(fileh_w, write_offset, MPI_SEEK_END) +#endif endif ! @@ -1719,11 +1741,13 @@ subroutine TRconvert_matel_j0_eigen(jrot) task = 'cor' if (trim(job%kinetmat_format).eq.'MPIIO') then ! +#ifdef TROVE_USE_MPI_ call restore_rot_kinetic_matrix_elements_mpi(jrot,treat_vibration,task,fileh) ! if(mpi_rank.eq.0) call MPI_File_write_shared(fileh_w, 'g_cor', 5, mpi_character, mpi_status_ignore, ierr) call MPI_Barrier(MPI_COMM_WORLD, ierr) !call MPI_File_seek_shared(fileh_w, int(0,MPI_OFFSET_KIND), MPI_SEEK_END) +#endif else call restore_rot_kinetic_matrix_elements(jrot,treat_vibration,task,iunit) ! @@ -1739,11 +1763,13 @@ subroutine TRconvert_matel_j0_eigen(jrot) islice = 9 ! if ((.not.job%IOmatelem_split) .and. blacs_size.gt.1) then +#ifdef TROVE_USE_MPI_ call MPI_File_get_position(fileh, read_offset, ierr) call MPI_File_set_view(fileh, read_offset, mpi_byte, gmat_block_type, "native", MPI_INFO_NULL, ierr) call MPI_File_get_position_shared(fileh_w, write_offset, ierr) call MPI_File_set_view(fileh_w, write_offset, mpi_byte, mat_s_block_type, "native", MPI_INFO_NULL, ierr) +#endif endif ! !do k1 = 1,FLNmodes @@ -1771,7 +1797,9 @@ subroutine TRconvert_matel_j0_eigen(jrot) else ! if (trim(job%kinetmat_format).eq.'MPIIO') then +#ifdef TROVE_USE_MPI_ call MPI_File_read_all(fileh, gmat, size(gmat), mpi_double_precision, mpi_status_ignore, ierr) +#endif else read(iunit) gmat endif @@ -1779,10 +1807,12 @@ subroutine TRconvert_matel_j0_eigen(jrot) endif ! if (blacs_size.gt.1) then +#ifdef TROVE_USE_MPI_ call pdgemm('T','N',Neigenroots,dimen,dimen,alpha,psi,1,1,desc_psi,& gmat,1,1,desc_gmat,beta,mat_t,1,1,desc_mat_t) call pdgemm('N','T',Neigenroots,Neigenroots,dimen,alpha,mat_t,1,1,desc_mat_t,& psi_t,1,1,desc_mat_t,beta,mat_s,1,1,desc_mat_s) +#endif else call dgemm('T','N',Neigenroots,dimen,dimen,alpha,psi,dimen,& gmat,dimen,beta,mat_t,Neigenroots) @@ -1806,7 +1836,9 @@ subroutine TRconvert_matel_j0_eigen(jrot) else ! if (trim(job%kinetmat_format).eq.'MPIIO') then +#ifdef TROVE_USE_MPI_ call MPI_File_write_all(fileh_w, mat_s, size(mat_s), mpi_double_precision, mpi_status_ignore, ierr) +#endif else write (chkptIO) mat_s endif @@ -1819,6 +1851,7 @@ subroutine TRconvert_matel_j0_eigen(jrot) ! ! Reset view to flat file if ((.not.job%IOmatelem_split) .and. blacs_size.gt.1) then +#ifdef TROVE_USE_MPI_ read_offset = read_offset + 3*int(dimen,MPI_OFFSET_KIND)*dimen*mpi_real_size call MPI_File_set_view(fileh, int(0,MPI_OFFSET_KIND), mpi_byte, mpi_byte, "native", MPI_INFO_NULL, ierr) call MPI_File_seek(fileh, read_offset, MPI_SEEK_SET) @@ -1828,13 +1861,16 @@ subroutine TRconvert_matel_j0_eigen(jrot) call MPI_File_set_view(fileh_w, int(0,MPI_OFFSET_KIND), mpi_byte, mpi_byte, "native", MPI_INFO_NULL, ierr) call MPI_File_seek_shared(fileh_w, write_offset, MPI_SEEK_END) !write_offset = 0 +#endif endif ! if (job%verbose>=5) call TimerStop('J0-convertion for g_cor') ! if ((.not.job%IOmatelem_split.or.job%iswap(1)==1).and.(mpi_rank.eq.0)) then if (trim(job%kinetmat_format).eq.'MPIIO') then +#ifdef TROVE_USE_MPI_ call MPI_File_write_shared(fileh_w, 'End Kinetic part', 16, mpi_character, mpi_status_ignore, ierr) +#endif else write(chkptIO) 'End Kinetic part' endif @@ -1842,7 +1878,9 @@ subroutine TRconvert_matel_j0_eigen(jrot) ! if (.not.job%vib_rot_contr) then if (trim(job%kinetmat_format).eq.'MPIIO') then +#ifdef TROVE_USE_MPI_ call MPI_File_close(fileh_w, ierr) +#endif else close(chkptIO,status='keep') endif @@ -1864,7 +1902,9 @@ subroutine TRconvert_matel_j0_eigen(jrot) endif ! if (trim(job%kinetmat_format).eq.'MPIIO') then +#ifdef TROVE_USE_MPI_ call MPI_File_close(fileh, ierr) +#endif else close(chkptIO,status='keep') endif @@ -1898,6 +1938,7 @@ subroutine TRconvert_matel_j0_eigen(jrot) ! if (trim(job%kinetmat_format).eq.'MPIIO') then ! +#ifdef TROVE_USE_MPI_ call MPI_File_open(mpi_comm_world, filename, mpi_mode_rdonly, mpi_info_null, fileh, ierr) ! call MPI_File_read_all(fileh, buf20, 7, mpi_character, mpi_status_ignore, ierr) @@ -1920,6 +1961,7 @@ subroutine TRconvert_matel_j0_eigen(jrot) stop 'restore_Extvib_matrix_elements - in file - illegal ncontracts ' end if ! +#endif else open(iunit,form='unformatted',action='read',position='rewind',status='old',file=filename) ! @@ -1948,6 +1990,7 @@ subroutine TRconvert_matel_j0_eigen(jrot) call IOStart(trim(job_is),chkptIO) ! if (trim(job%kinetmat_format).eq.'MPIIO') then +#ifdef TROVE_USE_MPI_ ! call mpi_file_open(mpi_comm_world, job%exteigen_file, mpi_mode_wronly+mpi_mode_create, mpi_info_null, fileh_w, ierr) call mpi_file_set_errhandler(fileh_w, mpi_errors_are_fatal) @@ -1966,6 +2009,7 @@ subroutine TRconvert_matel_j0_eigen(jrot) call mpi_barrier(mpi_comm_world, ierr) call mpi_file_seek(fileh_w, int(0,mpi_offset_kind), mpi_seek_end) ! +#endif else ! open(chkptio,form='unformatted',action='write',position='rewind',status='replace',file=job%exteigen_file) @@ -1994,11 +2038,13 @@ subroutine TRconvert_matel_j0_eigen(jrot) endif ! if ((.not.job%IOextF_divide) .and. blacs_size.gt.1) then +#ifdef TROVE_USE_MPI_ call MPI_File_get_position(fileh, read_offset, ierr) call MPI_File_set_view(fileh, read_offset, mpi_byte, extF_block_type, "native", MPI_INFO_NULL, ierr) call MPI_File_get_position_shared(fileh_w, write_offset, ierr) call MPI_File_set_view(fileh_w, write_offset, mpi_byte, mat_s_block_type, "native", MPI_INFO_NULL, ierr) +#endif endif ! do imu = fitting%iparam(1),fitting%iparam(2) @@ -2020,9 +2066,11 @@ subroutine TRconvert_matel_j0_eigen(jrot) else ! if (trim(job%kinetmat_format).eq.'MPIIO') then +#ifdef TROVE_USE_MPI_ call MPI_File_read_all(fileh, imu_t, 1, mpi_integer, mpi_status_ignore, ierr) ! call MPI_File_read_all(fileh, extF_me, size(extF_me), mpi_double_precision, mpi_status_ignore, ierr) +#endif else read(iunit) imu_t ! @@ -2032,10 +2080,12 @@ subroutine TRconvert_matel_j0_eigen(jrot) endif ! if(blacs_size.gt.1) then +#ifdef TROVE_USE_MPI_ call pdgemm('T','N',Neigenroots,dimen,dimen,alpha,psi,1,1,desc_psi,& extF_me,1,1,desc_gmat,beta,mat_t,1,1,desc_mat_t) call pdgemm('N','T',Neigenroots,Neigenroots,dimen,alpha,mat_t,1,1,desc_mat_t,& psi_t,1,1,desc_mat_t,beta,mat_s,1,1,desc_mat_s) +#endif else call dgemm('T','N',Neigenroots,dimen,dimen,alpha,psi,dimen,& extF_me,dimen,beta,mat_t,Neigenroots) @@ -2072,10 +2122,12 @@ subroutine TRconvert_matel_j0_eigen(jrot) if (.not.job%IOextF_divide.or.job%IOextF_stitch) then ! if (trim(job%kinetmat_format).eq.'MPIIO') then +#ifdef TROVE_USE_MPI_ if(mpi_rank.eq.0) call MPI_File_write(fileh_w, imu, 1, mpi_integer, mpi_status_ignore, ierr) call MPI_Barrier(mpi_comm_world, ierr) call MPI_File_seek_shared(fileh_w, int(0,MPI_OFFSET_KIND), MPI_SEEK_END) call MPI_File_write_all(fileh_w, mat_s, size(mat_s), mpi_double_precision, mpi_status_ignore, ierr) +#endif else write(chkptIO) imu write(chkptIO) mat_s @@ -2094,6 +2146,7 @@ subroutine TRconvert_matel_j0_eigen(jrot) enddo ! Reset view to flat file if ((.not.job%IOextF_divide) .and. blacs_size.gt.1) then +#ifdef TROVE_USE_MPI_ read_offset = read_offset + (fitting%iparam(2)-fitting%iparam(1)+1)*int(ncontr_t,MPI_OFFSET_KIND)*ncontr_t*mpi_real_size & + (fitting%iparam(2)-fitting%iparam(1)+1)*mpi_int_size call MPI_File_set_view(fileh, int(0,MPI_OFFSET_KIND), mpi_byte, mpi_byte, "native", MPI_INFO_NULL, ierr) @@ -2104,6 +2157,7 @@ subroutine TRconvert_matel_j0_eigen(jrot) call MPI_File_seek_shared(fileh_w, write_offset, MPI_SEEK_SET) !write_offset = 0 !call MPI_File_seek_shared(fileh_w, write_offset, MPI_SEEK_END) +#endif endif ! if (allocated(extF_me)) deallocate(extF_me) @@ -2112,7 +2166,9 @@ subroutine TRconvert_matel_j0_eigen(jrot) if (.not.job%IOextF_divide) then ! if (trim(job%kinetmat_format).eq.'MPIIO') then +#ifdef TROVE_USE_MPI_ call MPI_File_read_all(fileh, buf20, 18, mpi_character, mpi_status_ignore, ierr) +#endif else read(iunit) buf20(1:18) endif @@ -2123,7 +2179,9 @@ subroutine TRconvert_matel_j0_eigen(jrot) end if ! if (trim(job%kinetmat_format).eq.'MPIIO') then +#ifdef TROVE_USE_MPI_ call MPI_File_close(fileh, ierr) +#endif else close(iunit,status='keep') endif @@ -2136,8 +2194,10 @@ subroutine TRconvert_matel_j0_eigen(jrot) if (.not.job%IOextF_divide.or.job%IOextF_stitch) then ! if (trim(job%kinetmat_format).eq.'MPIIO') then +#ifdef TROVE_USE_MPI_ if(mpi_rank.eq.0) call MPI_File_write(fileh_w, 'End external field', 18, mpi_character, mpi_status_ignore, ierr) call MPI_File_close(fileh_w, ierr) +#endif else write(chkptIO) 'End external field' close(chkptIO,status='keep') @@ -2213,6 +2273,8 @@ subroutine divided_slice_write_mpi(islice,name,suffix,N,field,block_type) integer(ik),intent(in) :: N real(rk),dimension(:,:),intent(in) :: field type(MPI_Datatype),intent(in) :: block_type + +#ifdef TROVE_USE_MPI_ character(len=4) :: jchar character(len=cl) :: filename type(MPI_File) :: chkptMPIIO @@ -2245,6 +2307,7 @@ subroutine divided_slice_write_mpi(islice,name,suffix,N,field,block_type) ! call MPI_File_close(chkptMPIIO, ierr) ! +#endif end subroutine divided_slice_write_mpi ! ! @@ -2321,6 +2384,7 @@ subroutine divided_slice_read_mpi(islice,name,suffix,N,field,block_type,ierr) real(rk),dimension(:,:),intent(out) :: field type(MPI_Datatype),intent(in) :: block_type integer(ik),intent(out) :: ierr +#ifdef TROVE_USE_MPI_ character(len=4) :: jchar type(MPI_File) :: chkptMPIIO character(len=cl) :: buf,filename,job_is @@ -2379,6 +2443,7 @@ subroutine divided_slice_read_mpi(islice,name,suffix,N,field,block_type,ierr) ! call MPI_File_close(chkptMPIIO, ierr) ! +#endif end subroutine divided_slice_read_mpi ! ! @@ -2738,6 +2803,7 @@ subroutine restore_rot_kinetic_matrix_elements_mpi(jrot,treat_vibration,kinetic_ logical,intent(in) :: treat_vibration character(len=3),intent(in) :: kinetic_part type(MPI_File),intent(in) :: fileh +#ifdef TROVE_USE_MPI_ ! integer(ik) :: iclasses,alloc,ilevel,ib,max_deg_size,nclasses,islice character(len=cl) :: job_is @@ -2968,6 +3034,7 @@ subroutine find_groundstate_icontr(Nclasses) ! end subroutine find_groundstate_icontr ! +#endif end subroutine restore_rot_kinetic_matrix_elements_mpi ! ! From d55c95108b0d7076b04ad647e47e241cb3beb459 Mon Sep 17 00:00:00 2001 From: Jamie Quinn Date: Fri, 7 May 2021 11:24:12 +0100 Subject: [PATCH 38/79] Add back missing #endif missed during merging --- plasma.f90 | 1 + 1 file changed, 1 insertion(+) diff --git a/plasma.f90 b/plasma.f90 index 6e50468..6a2ee44 100644 --- a/plasma.f90 +++ b/plasma.f90 @@ -201,6 +201,7 @@ subroutine plasma_diag_dsytrdx(n,a,w,nroots,Ethres_) STOP END IF CALL USETPLASMAENV() +#endif ! real_end = get_real_time() cpu_end = get_cpu_time () From 9a201a9b774c60d8c3f889fd3d8e6a635086876d Mon Sep 17 00:00:00 2001 From: Jamie Quinn Date: Fri, 7 May 2021 11:32:07 +0100 Subject: [PATCH 39/79] Ensure merged TROVE compiles using gfortran --- fields.f90 | 2 +- mpi_aux.f90 | 2 +- refinement.f90 | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/fields.f90 b/fields.f90 index 684565f..ad1219f 100644 --- a/fields.f90 +++ b/fields.f90 @@ -28,7 +28,7 @@ module fields public FLread_coeff_matelem,FLinitilize_Potential_original public FLcalc_poten_kinet_dvr,job,FLcalcsT,FLenercutT,FLeigenfile,FLinitilize_Potential,FLinit_External_field_andrey public FLextF_coeffs,FL_rotation_energy_surface,FLextF_matelem,FLread_iorder_send - public jobt, trove, bset, analysis, action, FLL2_coeffs, FLread_fields_dimension_field,FLread_IndexQ_field + public jobt, trove, manifold, bset, analysis, action, FLL2_coeffs, FLread_fields_dimension_field,FLread_IndexQ_field ! public BaisSetT,Basis1DT,FL_fdf,FLNmodes,FLanalysisT,FLresT,FLpartfunc,FLactionT,FLfinitediffs,FLpoten_linearized,FLread_ZPE public FLJGammaLevelT diff --git a/mpi_aux.f90 b/mpi_aux.f90 index 140d5e2..a020bc1 100644 --- a/mpi_aux.f90 +++ b/mpi_aux.f90 @@ -46,7 +46,7 @@ module mpi_aux integer :: dummy = 0 end type MPI_Request - parameter MPI_OFFSET_KIND=8 + integer, parameter :: MPI_OFFSET_KIND=8 #endif integer,dimension(:),allocatable :: proc_sizes, proc_offsets, send_or_recv diff --git a/refinement.f90 b/refinement.f90 index fa266fc..203c685 100644 --- a/refinement.f90 +++ b/refinement.f90 @@ -1,8 +1,8 @@ module refinement use fields, only : manifold,job,fitting,j0fit,FLNmodes,FLindexQ,FLQindex,FL_fdf,FLpoten4xi,& - use accuracy, only : ik, hik, rk, ark, cl, wl, out, small_ FLfinitediffs_2d,FLpoten_linearized,analysis,action + use accuracy, only : ik, hik, rk, ark, cl, wl, out, small_ use timer, only : IOstart,Arraystart,Arraystop,Arrayminus,Timerstart,Timerstop,MemoryReport,TimerReport use molecules, only : MLcoord_direct,MLinvmat,MLinvmatark,MLcoordinate_transform_func,MLpotentialfunc use moltype, only : manifold,molec, extF,ML_check_steps,MLdiag_ulen,MLlinur,MLfromlocal2cartesian From 3f1d4be5d449458d729280c5c05c9848ae145859 Mon Sep 17 00:00:00 2001 From: Jamie Quinn Date: Fri, 7 May 2021 11:36:07 +0100 Subject: [PATCH 40/79] Fix makefile dependency --- makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/makefile b/makefile index 1baf2bb..e3ac0bc 100644 --- a/makefile +++ b/makefile @@ -162,7 +162,7 @@ extfield.o: extfield.f90 accuracy.o timer.o rotme_cart_tens.o richmol_data.o fie fields.o: fields.f90 accuracy.o molecules.o lapack.o me_str.o me_bnd.o me_numer.o me_rot.o timer.o moltype.o symmetry.o input.o accuracy.o moltype.o accuracy.o moltype.o accuracy.o moltype.o fwigxjpf.o: fwigxjpf.f90 $(WIGXJPF_LIB) grid.o: grid.f90 accuracy.o fields.o splines.o iso_c_binding.o iso_c_binding.o -input.o: input.f90 +input.o: input.f90 accuracy.o kin_xy2.o: kin_xy2.f90 accuracy.o moltype.o lapack.o: lapack.f90 accuracy.o timer.o ltp2011_water_dipole_surface.o: ltp2011_water_dipole_surface.f90 From ae500c56d27e12a1486096e15e957e190199f957 Mon Sep 17 00:00:00 2001 From: Jamie Quinn Date: Mon, 10 May 2021 17:24:25 +0100 Subject: [PATCH 41/79] Fix segfault when compiling with -O0 --- tran.f90 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tran.f90 b/tran.f90 index 18ffafe..be5dccf 100644 --- a/tran.f90 +++ b/tran.f90 @@ -328,7 +328,7 @@ subroutine index_correlation(njval, jval) end if ! !$omp parallel default(shared) & - !$omp& private(cnu_i, cnu_j, nclasses, info, icase, jcase, ilambda, jlambda, found, my_fmt) & + !$omp& private(cnu_i, cnu_j, info, icase, jcase, ilambda, jlambda, found, my_fmt) & !$omp& private(iroot,jcontr,ilevel,ideg,k,tau) allocate(cnu_i(1:nclasses),cnu_j(1:nclasses),stat = info) if (info /= 0) stop 'index_correlation: cnu_i allocation error' From d64cc5ad4a63a1ed5c702ea1490a24361cfa0bc6 Mon Sep 17 00:00:00 2001 From: Jamie Quinn Date: Tue, 11 May 2021 18:41:23 +0100 Subject: [PATCH 42/79] Fix incorrect capitalisation of header when writing external field --- tran.f90 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tran.f90 b/tran.f90 index be5dccf..ee92b9b 100644 --- a/tran.f90 +++ b/tran.f90 @@ -2090,7 +2090,7 @@ subroutine TRconvert_matel_j0_eigen(jrot) ! if(mpi_rank.eq.0) then call mpi_file_write_shared(fileh_w, '[MPIIO]', 7, mpi_character, mpi_status_ignore, ierr) - call mpi_file_write_shared(fileh_w, 'start external field', 20, mpi_character, mpi_status_ignore, ierr) + call mpi_file_write_shared(fileh_w, 'Start external field', 20, mpi_character, mpi_status_ignore, ierr) endif ! ! store the matrix elements @@ -2103,7 +2103,7 @@ subroutine TRconvert_matel_j0_eigen(jrot) else ! open(chkptio,form='unformatted',action='write',position='rewind',status='replace',file=job%exteigen_file) - write(chkptio) 'start external field' + write(chkptio) 'Start external field' ! ! store the matrix elements ! From f776b927ece2dd31ea955131c4edc34e3b227c12 Mon Sep 17 00:00:00 2001 From: Jamie Quinn Date: Wed, 19 May 2021 17:43:20 +0100 Subject: [PATCH 43/79] Add docstrings to compare_results.py --- test/compare_results.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/test/compare_results.py b/test/compare_results.py index 7476b6f..397e11a 100644 --- a/test/compare_results.py +++ b/test/compare_results.py @@ -8,7 +8,7 @@ QUANTUM_ENERGY_IDX=4 def find_start_end_block(lines, blockname): - """Identifies start and end of blocks""" + """Identify start and end of blocks""" for i, line in enumerate(lines): if "Start " + blockname in line: start_idx = i @@ -17,29 +17,35 @@ def find_start_end_block(lines, blockname): return start_idx, end_idx def extract_quantum_block(lines): - """Extracts only the quantum block from a chk file""" + """Extract only the quantum block from a chk file""" idxs = find_start_end_block(lines, "Quantum") temp = lines[idxs[0]+4:idxs[-1]] return [line.split() for line in temp] def extract_quantum_energies(block): + """Extract quantum energies from entire block""" return [float(line[QUANTUM_ENERGY_IDX]) for line in block] def read_chk_file(fname): + """Read checkpoint file as a list of lines""" with open(fname, 'r') as fp: + # Strip newlines and lines with comments lines = [line for line in fp.readlines() if line != '\n' and '<-' not in line] return lines def read_energy_column(fname, column_no): + """Extract energies from a column in file fname""" lines = read_chk_file(fname) lines = lines[:-1] # remove last line (which is not part of the actual data) return [float(line.split()[column_no]) for line in lines] def read_quantum_block(fname): + """Extract quantum energies from fname""" lines = read_chk_file(fname) return extract_quantum_block(lines) def compare_columns(fname1, fname2, column_no, precision=1e-10): + """Compare two energy files""" energies1 = read_energy_column(fname1, column_no) energies2 = read_energy_column(fname2, column_no) @@ -48,6 +54,7 @@ def compare_columns(fname1, fname2, column_no, precision=1e-10): f"{e1} and {e2} differ by {abs(e1-e2)}" def compare_quantum_files(fname1, fname2, precision=1e-10): + """Compare two files in quantum form""" energy_block1 = read_quantum_block(fname1) energy_block2 = read_quantum_block(fname2) From d8f1a4c815be23d9e79becbae2e44d3472a2ea5c Mon Sep 17 00:00:00 2001 From: Jamie Quinn Date: Wed, 19 May 2021 18:52:32 +0100 Subject: [PATCH 44/79] Refactor comparison script and add testing of intensity log --- test/compare_results.py | 107 ++++++++++++++++++++++++++++++---------- 1 file changed, 80 insertions(+), 27 deletions(-) diff --git a/test/compare_results.py b/test/compare_results.py index 397e11a..d82c828 100644 --- a/test/compare_results.py +++ b/test/compare_results.py @@ -3,12 +3,16 @@ import sys import os import argparse -from pytest import approx +import math QUANTUM_ENERGY_IDX=4 +INTENSITY_INDICES = { + "einstein": 2, + "nu": 3, +} def find_start_end_block(lines, blockname): - """Identify start and end of blocks""" + """Identify start and end of blocks in .chk files""" for i, line in enumerate(lines): if "Start " + blockname in line: start_idx = i @@ -16,52 +20,101 @@ def find_start_end_block(lines, blockname): end_idx = i return start_idx, end_idx -def extract_quantum_block(lines): - """Extract only the quantum block from a chk file""" - idxs = find_start_end_block(lines, "Quantum") - temp = lines[idxs[0]+4:idxs[-1]] - return [line.split() for line in temp] +def find_log_block(lines, blockname): + """Identify start and end of block in log file""" + start_idx = end_idx = None + for i, line in enumerate(lines): + if blockname in line: + start_idx = i + break + + # find first "done" after we've found blockname + for i, line in enumerate(lines[start_idx:]): + if "done" in line: + end_idx = i + start_idx + break -def extract_quantum_energies(block): - """Extract quantum energies from entire block""" - return [float(line[QUANTUM_ENERGY_IDX]) for line in block] + if start_idx is not None: + raise Exception(f"{blockname} not found") + elif end_idx is not None: + raise Exception(f"Could not find end of {blockname}") + + return start_idx, end_idx def read_chk_file(fname): """Read checkpoint file as a list of lines""" with open(fname, 'r') as fp: # Strip newlines and lines with comments - lines = [line for line in fp.readlines() if line != '\n' and '<-' not in line] + lines = fp.readlines() return lines +def strip_newlines(lines): + """Remove all lines which are just newlines""" + return [line for line in lines if line != '\n'] + +def strip_comments(lines): + """Remove all lines with comments""" + return [line for line in lines if '<-' not in line] + +def extract_column(lines, column_no): + """Extract column of numbers from list of str lines""" + return [float(line.split()[column_no]) for line in lines] + def read_energy_column(fname, column_no): """Extract energies from a column in file fname""" lines = read_chk_file(fname) - lines = lines[:-1] # remove last line (which is not part of the actual data) - return [float(line.split()[column_no]) for line in lines] + lines = strip_newlines(strip_comments(lines)) + # remove last line (which is not part of the actual data) + lines = lines[:-1] + return extract_column(lines, column_no) -def read_quantum_block(fname): +def read_quantum_energies(fname): """Extract quantum energies from fname""" lines = read_chk_file(fname) - return extract_quantum_block(lines) + lines = strip_newlines(strip_comments(lines)) + start, end = find_start_end_block(lines, "Quantum") + # take out first 4 lines and last line of block + lines = lines[start+4:end] + return extract_column(lines, QUANTUM_ENERGY_IDX) -def compare_columns(fname1, fname2, column_no, precision=1e-10): +def read_intensity_column(fname, column_name): + """Extract quantum energies from fname""" + if column_name not in INTENSITY_INDICES.keys(): + raise Exception(f"Intensity column name must be one of {INTENSITY_INDICES.keys()}") + + lines = read_chk_file(fname) + lines = strip_newlines(lines) + start, end = find_log_block(lines, "Linestrength") + # take out first 4 lines and last line of block + lines = lines[start+1:end] + + return extract_column(lines, INTENSITY_INDICES[column_name]) + +def compare_columns(col1, col2, abs_precision=0.0, rel_precision=1e-10): + """Compare two columns of numbers to a give absolute or relative precision""" + for i, (e1, e2) in enumerate(zip(col1, col2)): + if not math.isclose(e1, e2, abs_tol=abs_precision, rel_tol=rel_precision): + print(f"{e1} and {e2} differ by {abs(e1-e2)} at index {i}") + + +def compare_energy_files(fname1, fname2, column_no, precision=1e-10): """Compare two energy files""" energies1 = read_energy_column(fname1, column_no) energies2 = read_energy_column(fname2, column_no) - - for e1, e2 in zip(energies1, energies2): - assert e1 == approx(e2, abs=precision), \ - f"{e1} and {e2} differ by {abs(e1-e2)}" + compare_columns(energies1, energies2, abs_precision=precision) def compare_quantum_files(fname1, fname2, precision=1e-10): """Compare two files in quantum form""" - energy_block1 = read_quantum_block(fname1) - energy_block2 = read_quantum_block(fname2) + energies1 = read_quantum_energies(fname1) + energies2 = read_quantum_energies(fname2) + compare_columns(energies1, energies2, rel_precision=precision) - energies1 = extract_quantum_energies(energy_block1) - energies2 = extract_quantum_energies(energy_block2) - - assert energies1 == approx(energies2, rel=precision) +def compare_intensity_files(fname1, fname2, precision=1e-10): + """Compare two files in quantum form""" + for col_name in INTENSITY_INDICES.keys(): + col1 = read_intensity_column(fname1, col_name) + col2 = read_intensity_column(fname2, col_name) + compare_columns(col1, col2, rel_precision=precision) def main(): parser = argparse.ArgumentParser(description='Compare output files from TROVE') @@ -87,7 +140,7 @@ def main(): if args.kind == 'column': for fname in filelist: - compare_columns(os.path.join(folder1, fname), os.path.join(folder2, fname), args.column, precision=args.precision) + compare_energy_files(os.path.join(folder1, fname), os.path.join(folder2, fname), args.column, precision=args.precision) elif args.kind == 'quantum': for fname in filelist: try: From c9a12eec6007f23cd06ffdc9b8cd2ff461e42f50 Mon Sep 17 00:00:00 2001 From: Jamie Quinn Date: Wed, 19 May 2021 18:53:54 +0100 Subject: [PATCH 45/79] Remove dependency on pytest (and thus pipenv) --- test/Pipfile | 12 ------- test/Pipfile.lock | 84 ----------------------------------------------- 2 files changed, 96 deletions(-) delete mode 100644 test/Pipfile delete mode 100644 test/Pipfile.lock diff --git a/test/Pipfile b/test/Pipfile deleted file mode 100644 index e0879a8..0000000 --- a/test/Pipfile +++ /dev/null @@ -1,12 +0,0 @@ -[[source]] -url = "https://pypi.org/simple" -verify_ssl = true -name = "pypi" - -[packages] -pytest = "*" - -[dev-packages] - -[requires] -python_version = "3.9" diff --git a/test/Pipfile.lock b/test/Pipfile.lock deleted file mode 100644 index a46855a..0000000 --- a/test/Pipfile.lock +++ /dev/null @@ -1,84 +0,0 @@ -{ - "_meta": { - "hash": { - "sha256": "8e1fa4cebfb362d534adf1f4e4dbd8e9fa3332fd94ae9f4df6e5e2ccc61ffa2d" - }, - "pipfile-spec": 6, - "requires": { - "python_version": "3.9" - }, - "sources": [ - { - "name": "pypi", - "url": "https://pypi.org/simple", - "verify_ssl": true - } - ] - }, - "default": { - "attrs": { - "hashes": [ - "sha256:31b2eced602aa8423c2aea9c76a724617ed67cf9513173fd3a4f03e3a929c7e6", - "sha256:832aa3cde19744e49938b91fea06d69ecb9e649c93ba974535d08ad92164f700" - ], - "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", - "version": "==20.3.0" - }, - "iniconfig": { - "hashes": [ - "sha256:011e24c64b7f47f6ebd835bb12a743f2fbe9a26d4cecaa7f53bc4f35ee9da8b3", - "sha256:bc3af051d7d14b2ee5ef9969666def0cd1a000e121eaea580d4a313df4b37f32" - ], - "version": "==1.1.1" - }, - "packaging": { - "hashes": [ - "sha256:5b327ac1320dc863dca72f4514ecc086f31186744b84a230374cc1fd776feae5", - "sha256:67714da7f7bc052e064859c05c595155bd1ee9f69f76557e21f051443c20947a" - ], - "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", - "version": "==20.9" - }, - "pluggy": { - "hashes": [ - "sha256:15b2acde666561e1298d71b523007ed7364de07029219b604cf808bfa1c765b0", - "sha256:966c145cd83c96502c3c3868f50408687b38434af77734af1e9ca461a4081d2d" - ], - "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", - "version": "==0.13.1" - }, - "py": { - "hashes": [ - "sha256:21b81bda15b66ef5e1a777a21c4dcd9c20ad3efd0b3f817e7a809035269e1bd3", - "sha256:3b80836aa6d1feeaa108e046da6423ab8f6ceda6468545ae8d02d9d58d18818a" - ], - "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", - "version": "==1.10.0" - }, - "pyparsing": { - "hashes": [ - "sha256:c203ec8783bf771a155b207279b9bccb8dea02d8f0c9e5f8ead507bc3246ecc1", - "sha256:ef9d7589ef3c200abe66653d3f1ab1033c3c419ae9b9bdb1240a85b024efc88b" - ], - "markers": "python_version >= '2.6' and python_version not in '3.0, 3.1, 3.2'", - "version": "==2.4.7" - }, - "pytest": { - "hashes": [ - "sha256:671238a46e4df0f3498d1c3270e5deb9b32d25134c99b7d75370a68cfbe9b634", - "sha256:6ad9c7bdf517a808242b998ac20063c41532a570d088d77eec1ee12b0b5574bc" - ], - "index": "pypi", - "version": "==6.2.3" - }, - "toml": { - "hashes": [ - "sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b", - "sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f" - ], - "markers": "python_version >= '2.6' and python_version not in '3.0, 3.1, 3.2'", - "version": "==0.10.2" - } - }, - "develop": {} -} From c648d22eaefefe51c9ceb1b8d143674e8749d76b Mon Sep 17 00:00:00 2001 From: Jamie J Quinn Date: Wed, 2 Jun 2021 10:14:56 +0100 Subject: [PATCH 46/79] Update test/compare_results.py Co-authored-by: ageorgou <1186102+ageorgou@users.noreply.github.com> --- test/compare_results.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/compare_results.py b/test/compare_results.py index d82c828..5638b1b 100644 --- a/test/compare_results.py +++ b/test/compare_results.py @@ -85,7 +85,7 @@ def read_intensity_column(fname, column_name): lines = read_chk_file(fname) lines = strip_newlines(lines) start, end = find_log_block(lines, "Linestrength") - # take out first 4 lines and last line of block + # take out first and last line of block lines = lines[start+1:end] return extract_column(lines, INTENSITY_INDICES[column_name]) From ee86bbb4b03b01a7a937ed0b4dc47ba02f154810 Mon Sep 17 00:00:00 2001 From: Jamie Quinn Date: Thu, 20 May 2021 11:27:36 +0100 Subject: [PATCH 47/79] Remove pipenv use from test runner --- test/scripts/H2CO/compare_results.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/test/scripts/H2CO/compare_results.sh b/test/scripts/H2CO/compare_results.sh index 586c80e..a817197 100755 --- a/test/scripts/H2CO/compare_results.sh +++ b/test/scripts/H2CO/compare_results.sh @@ -9,10 +9,10 @@ folder2=$2 # Quantum states can be equivalent without being equal so cannot be directly compared quantum_files="eigen_descr0_1.chk eigen_descr0_2.chk eigen_descr0_3.chk eigen_descr0_4.chk j0contr_descr.chk j0eigen_descr0_1.chk j0eigen_descr0_2.chk j0eigen_descr0_3.chk j0eigen_descr0_4.chk j0eigen_descr1_1.chk j0eigen_descr1_2.chk j0eigen_descr1_3.chk j0eigen_descr1_4.chk j0eigen_descr2_1.chk j0eigen_descr2_2.chk j0eigen_descr2_3.chk j0eigen_descr2_4.chk j0eigen_descr3_1.chk j0eigen_descr3_2.chk j0eigen_descr3_3.chk j0eigen_descr3_4.chk j0eigen_descr4_1.chk j0eigen_descr4_2.chk j0eigen_descr4_3.chk j0eigen_descr4_4.chk j0eigen_descr10_1.chk j0eigen_descr10_2.chk j0eigen_descr10_3.chk j0eigen_descr10_4.chk j0eigen_descr11_1.chk j0eigen_descr11_2.chk j0eigen_descr11_3.chk j0eigen_descr11_4.chk j0eigen_descr12_1.chk j0eigen_descr12_2.chk j0eigen_descr12_3.chk j0eigen_descr12_4.chk" -pipenv run python compare_results.py --kind quantum --folder1 "$folder1" --folder2 "$folder2" $quantum_files +python compare_results.py --kind quantum --folder1 "$folder1" --folder2 "$folder2" $quantum_files -pipenv run python compare_results.py --kind column --column 3 --precision 5e-3 --folder1 "$folder1" --folder2 "$folder2" external.chk +python compare_results.py --kind column --column 3 --precision 5e-3 --folder1 "$folder1" --folder2 "$folder2" external.chk -pipenv run python compare_results.py --kind column --column 2 --precision 1e-8 --folder1 "$folder1" --folder2 "$folder2" potential.chk +python compare_results.py --kind column --column 2 --precision 1e-8 --folder1 "$folder1" --folder2 "$folder2" potential.chk -pipenv run python compare_results.py --kind column --column 4 --precision 1e-8 --folder1 "$folder1" --folder2 "$folder2" kinetic.chk +python compare_results.py --kind column --column 4 --precision 1e-8 --folder1 "$folder1" --folder2 "$folder2" kinetic.chk From 9cf070b499ee28385f04b8ad8085a36d7c99e40d Mon Sep 17 00:00:00 2001 From: Jamie Quinn Date: Wed, 2 Jun 2021 09:55:46 +0100 Subject: [PATCH 48/79] Fail earlier when searching log block --- test/compare_results.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/test/compare_results.py b/test/compare_results.py index 5638b1b..668c368 100644 --- a/test/compare_results.py +++ b/test/compare_results.py @@ -28,15 +28,16 @@ def find_log_block(lines, blockname): start_idx = i break + if start_idx is None: + raise Exception(f"{blockname} not found") + # find first "done" after we've found blockname for i, line in enumerate(lines[start_idx:]): if "done" in line: end_idx = i + start_idx break - if start_idx is not None: - raise Exception(f"{blockname} not found") - elif end_idx is not None: + if end_idx is None: raise Exception(f"Could not find end of {blockname}") return start_idx, end_idx From 8c0dc2c6b097cf6db748bad2d49d469189557ae7 Mon Sep 17 00:00:00 2001 From: Jamie Quinn Date: Wed, 2 Jun 2021 10:12:08 +0100 Subject: [PATCH 49/79] Remove pointless comment --- test/compare_results.py | 1 - 1 file changed, 1 deletion(-) diff --git a/test/compare_results.py b/test/compare_results.py index 668c368..f33f1ed 100644 --- a/test/compare_results.py +++ b/test/compare_results.py @@ -45,7 +45,6 @@ def find_log_block(lines, blockname): def read_chk_file(fname): """Read checkpoint file as a list of lines""" with open(fname, 'r') as fp: - # Strip newlines and lines with comments lines = fp.readlines() return lines From b8d1729ba4c4fc11b45da212fd1bb179795ce06f Mon Sep 17 00:00:00 2001 From: Jamie Quinn Date: Wed, 2 Jun 2021 10:13:57 +0100 Subject: [PATCH 50/79] When there are differences in outputs, print all differences then exit --- test/compare_results.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/test/compare_results.py b/test/compare_results.py index f33f1ed..0f88103 100644 --- a/test/compare_results.py +++ b/test/compare_results.py @@ -92,10 +92,13 @@ def read_intensity_column(fname, column_name): def compare_columns(col1, col2, abs_precision=0.0, rel_precision=1e-10): """Compare two columns of numbers to a give absolute or relative precision""" + difference_exists = False for i, (e1, e2) in enumerate(zip(col1, col2)): if not math.isclose(e1, e2, abs_tol=abs_precision, rel_tol=rel_precision): + difference_exists = True print(f"{e1} and {e2} differ by {abs(e1-e2)} at index {i}") + assert difference_exists == False def compare_energy_files(fname1, fname2, column_no, precision=1e-10): """Compare two energy files""" From 2ff450c64f8c5db952ffac38f7fd2c2afa0ac5b4 Mon Sep 17 00:00:00 2001 From: Jamie Quinn Date: Wed, 2 Jun 2021 10:16:23 +0100 Subject: [PATCH 51/79] Use assertion instead of exception for testing validity of intensity keys --- test/compare_results.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/test/compare_results.py b/test/compare_results.py index 0f88103..76fcb54 100644 --- a/test/compare_results.py +++ b/test/compare_results.py @@ -79,8 +79,7 @@ def read_quantum_energies(fname): def read_intensity_column(fname, column_name): """Extract quantum energies from fname""" - if column_name not in INTENSITY_INDICES.keys(): - raise Exception(f"Intensity column name must be one of {INTENSITY_INDICES.keys()}") + assert column_name in INTENSITY_INDICES.keys(), f"Intensity column name must be one of {INTENSITY_INDICES.keys()}") lines = read_chk_file(fname) lines = strip_newlines(lines) From 8cae86f6c96364fee917e6e70c126329f1b6818b Mon Sep 17 00:00:00 2001 From: Jamie Quinn Date: Wed, 2 Jun 2021 10:16:46 +0100 Subject: [PATCH 52/79] Add intensity as option in comparison --- test/compare_results.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/test/compare_results.py b/test/compare_results.py index 76fcb54..9aa21c1 100644 --- a/test/compare_results.py +++ b/test/compare_results.py @@ -126,7 +126,7 @@ def main(): help='first folder to compare') parser.add_argument('--folder2', required=True, help='second folder to compare') - parser.add_argument('--kind', choices=['quantum', 'column'], required=True, help='type of file') + parser.add_argument('--kind', choices=['quantum', 'column', 'intensity'], required=True, help='type of file') parser.add_argument('--column', default=-1, type=int, help='column to compare when \'column\' is supplied to --kind (index starts at 0)') parser.add_argument('--precision', default=1e-10, type=float, help='relative precision of which two values must differ by to be considered nonequal') @@ -149,6 +149,9 @@ def main(): compare_quantum_files(folder1 + "/" + fname, folder2 + "/" + fname, precision=args.precision) except IndexError: print(folder1 + "/" + fname, "does not match regular quantum file format") + elif args.kind == 'intensity': + for fname in filelist: + compare_quantum_files(folder1 + "/" + fname, folder2 + "/" + fname, precision=args.precision) exit(0) From 15dc28524bcf4f4e499d1d50ca174cd9eaf831c5 Mon Sep 17 00:00:00 2001 From: Jamie Quinn Date: Wed, 2 Jun 2021 10:21:38 +0100 Subject: [PATCH 53/79] Enable testing of intensity log file --- test/scripts/H2CO/compare_results.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/test/scripts/H2CO/compare_results.sh b/test/scripts/H2CO/compare_results.sh index a817197..f23ecb8 100755 --- a/test/scripts/H2CO/compare_results.sh +++ b/test/scripts/H2CO/compare_results.sh @@ -11,6 +11,8 @@ quantum_files="eigen_descr0_1.chk eigen_descr0_2.chk eigen_descr0_3.chk eigen_de python compare_results.py --kind quantum --folder1 "$folder1" --folder2 "$folder2" $quantum_files +python compare_results.py --kind intensity --folder1 "$folder1" --folder2 "$folder2" file_intensity.out + python compare_results.py --kind column --column 3 --precision 5e-3 --folder1 "$folder1" --folder2 "$folder2" external.chk python compare_results.py --kind column --column 2 --precision 1e-8 --folder1 "$folder1" --folder2 "$folder2" potential.chk From 98b3587e8e44396c01e2662dfdd52838a73ed496 Mon Sep 17 00:00:00 2001 From: Jamie Quinn Date: Wed, 2 Jun 2021 10:54:38 +0100 Subject: [PATCH 54/79] Remove unused pipenv steps in CI --- .github/workflows/ci.yml | 3 --- 1 file changed, 3 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 17c70e3..2a80e67 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -40,9 +40,6 @@ jobs: - name: Install Intel MKL run: sudo apt-get install -y intel-mkl - - name: Install pipenv environment - run: cd test && pip install pipenv && pipenv install - - name: Build run: | gfortran --version From 1feb47045fe448fcadfe02b9eafd671c71b405bd Mon Sep 17 00:00:00 2001 From: Jamie Quinn Date: Wed, 2 Jun 2021 16:29:37 +0100 Subject: [PATCH 55/79] Comparing intensity outputs works down to 1e-6 --- test/scripts/H2CO/compare_results.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/scripts/H2CO/compare_results.sh b/test/scripts/H2CO/compare_results.sh index f23ecb8..caef145 100755 --- a/test/scripts/H2CO/compare_results.sh +++ b/test/scripts/H2CO/compare_results.sh @@ -11,7 +11,7 @@ quantum_files="eigen_descr0_1.chk eigen_descr0_2.chk eigen_descr0_3.chk eigen_de python compare_results.py --kind quantum --folder1 "$folder1" --folder2 "$folder2" $quantum_files -python compare_results.py --kind intensity --folder1 "$folder1" --folder2 "$folder2" file_intensity.out +python compare_results.py --kind intensity --precision 1e-6 --folder1 "$folder1" --folder2 "$folder2" file_intensity.out python compare_results.py --kind column --column 3 --precision 5e-3 --folder1 "$folder1" --folder2 "$folder2" external.chk From a2bb6d03ed1abe1ae11cee9fef0ac7df08263a4d Mon Sep 17 00:00:00 2001 From: Jamie Quinn Date: Wed, 2 Jun 2021 16:29:56 +0100 Subject: [PATCH 56/79] Comments --- test/compare_results.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/compare_results.py b/test/compare_results.py index 9aa21c1..99bcaa0 100644 --- a/test/compare_results.py +++ b/test/compare_results.py @@ -90,7 +90,7 @@ def read_intensity_column(fname, column_name): return extract_column(lines, INTENSITY_INDICES[column_name]) def compare_columns(col1, col2, abs_precision=0.0, rel_precision=1e-10): - """Compare two columns of numbers to a give absolute or relative precision""" + """Compare two columns of numbers to a given absolute or relative precision""" difference_exists = False for i, (e1, e2) in enumerate(zip(col1, col2)): if not math.isclose(e1, e2, abs_tol=abs_precision, rel_tol=rel_precision): @@ -109,6 +109,7 @@ def compare_quantum_files(fname1, fname2, precision=1e-10): """Compare two files in quantum form""" energies1 = read_quantum_energies(fname1) energies2 = read_quantum_energies(fname2) + # Note, this uses the more accurate rel_precision compare_columns(energies1, energies2, rel_precision=precision) def compare_intensity_files(fname1, fname2, precision=1e-10): From 44e88cef321d686684bd442d6038752288a99ca7 Mon Sep 17 00:00:00 2001 From: Jamie Quinn Date: Wed, 2 Jun 2021 16:30:52 +0100 Subject: [PATCH 57/79] Fix intensity comparison and test using absolute difference --- test/compare_results.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/compare_results.py b/test/compare_results.py index 99bcaa0..b09b199 100644 --- a/test/compare_results.py +++ b/test/compare_results.py @@ -79,7 +79,7 @@ def read_quantum_energies(fname): def read_intensity_column(fname, column_name): """Extract quantum energies from fname""" - assert column_name in INTENSITY_INDICES.keys(), f"Intensity column name must be one of {INTENSITY_INDICES.keys()}") + assert column_name in INTENSITY_INDICES.keys(), f"Intensity column name must be one of {INTENSITY_INDICES.keys()}" lines = read_chk_file(fname) lines = strip_newlines(lines) @@ -117,7 +117,7 @@ def compare_intensity_files(fname1, fname2, precision=1e-10): for col_name in INTENSITY_INDICES.keys(): col1 = read_intensity_column(fname1, col_name) col2 = read_intensity_column(fname2, col_name) - compare_columns(col1, col2, rel_precision=precision) + compare_columns(col1, col2, abs_precision=precision) def main(): parser = argparse.ArgumentParser(description='Compare output files from TROVE') @@ -152,7 +152,7 @@ def main(): print(folder1 + "/" + fname, "does not match regular quantum file format") elif args.kind == 'intensity': for fname in filelist: - compare_quantum_files(folder1 + "/" + fname, folder2 + "/" + fname, precision=args.precision) + compare_intensity_files(folder1 + "/" + fname, folder2 + "/" + fname, precision=args.precision) exit(0) From 0d64c38ccc300d8b15e44efef774df69d6eafdc0 Mon Sep 17 00:00:00 2001 From: Jamie Quinn Date: Wed, 2 Jun 2021 16:31:15 +0100 Subject: [PATCH 58/79] Don't strip comments when running quantum energy comparison --- test/compare_results.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/compare_results.py b/test/compare_results.py index b09b199..3148767 100644 --- a/test/compare_results.py +++ b/test/compare_results.py @@ -71,7 +71,7 @@ def read_energy_column(fname, column_no): def read_quantum_energies(fname): """Extract quantum energies from fname""" lines = read_chk_file(fname) - lines = strip_newlines(strip_comments(lines)) + lines = strip_newlines(lines) start, end = find_start_end_block(lines, "Quantum") # take out first 4 lines and last line of block lines = lines[start+4:end] From b8d19a7f22ec567bdd23897a473df270c46cf896 Mon Sep 17 00:00:00 2001 From: Jamie Quinn Date: Wed, 2 Jun 2021 16:31:49 +0100 Subject: [PATCH 59/79] Sort intensity numbers by relevant ID before comparison --- test/compare_results.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/test/compare_results.py b/test/compare_results.py index 3148767..ee9fa36 100644 --- a/test/compare_results.py +++ b/test/compare_results.py @@ -84,10 +84,16 @@ def read_intensity_column(fname, column_name): lines = read_chk_file(fname) lines = strip_newlines(lines) start, end = find_log_block(lines, "Linestrength") - # take out first and last line of block - lines = lines[start+1:end] + # take out non-data lines + lines = lines[start+2:end] - return extract_column(lines, INTENSITY_INDICES[column_name]) + ids = [int(line.split()[0]) for line in lines] + data = extract_column(lines, INTENSITY_INDICES[column_name]) + + # Order of output can change so sort by element ID + sorted_data = [x for _, x in sorted(zip(ids, data))] + + return sorted_data def compare_columns(col1, col2, abs_precision=0.0, rel_precision=1e-10): """Compare two columns of numbers to a given absolute or relative precision""" From 964ce8000722633fd7a68c7fe9126e662ed83edf Mon Sep 17 00:00:00 2001 From: Jamie Quinn Date: Wed, 2 Jun 2021 16:32:27 +0100 Subject: [PATCH 60/79] Implement error checking when finding block in chkpoint file --- test/compare_results.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/test/compare_results.py b/test/compare_results.py index ee9fa36..8e15019 100644 --- a/test/compare_results.py +++ b/test/compare_results.py @@ -13,16 +13,25 @@ def find_start_end_block(lines, blockname): """Identify start and end of blocks in .chk files""" + start_idx = end_idx = None + for i, line in enumerate(lines): if "Start " + blockname in line: start_idx = i elif "End " + blockname in line: end_idx = i + + if start_idx is None: + raise Exception(f"{blockname} not found") + if end_idx is None: + raise Exception(f"Could not find end of {blockname}") + return start_idx, end_idx def find_log_block(lines, blockname): """Identify start and end of block in log file""" start_idx = end_idx = None + for i, line in enumerate(lines): if blockname in line: start_idx = i From 4da59e1d983ed007926c3656c491f0c84e783d31 Mon Sep 17 00:00:00 2001 From: Jamie J Quinn Date: Wed, 2 Jun 2021 16:39:43 +0100 Subject: [PATCH 61/79] Update test/compare_results.py Co-authored-by: ageorgou <1186102+ageorgou@users.noreply.github.com> --- test/compare_results.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/compare_results.py b/test/compare_results.py index 8e15019..de79ae5 100644 --- a/test/compare_results.py +++ b/test/compare_results.py @@ -112,7 +112,7 @@ def compare_columns(col1, col2, abs_precision=0.0, rel_precision=1e-10): difference_exists = True print(f"{e1} and {e2} differ by {abs(e1-e2)} at index {i}") - assert difference_exists == False + assert not difference_exists def compare_energy_files(fname1, fname2, column_no, precision=1e-10): """Compare two energy files""" From 11d012753bb921d02b1663688d5c092012de7f6a Mon Sep 17 00:00:00 2001 From: Jamie Quinn Date: Wed, 2 Jun 2021 16:41:09 +0100 Subject: [PATCH 62/79] Handle paths maturely --- test/compare_results.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/compare_results.py b/test/compare_results.py index de79ae5..aa5a656 100644 --- a/test/compare_results.py +++ b/test/compare_results.py @@ -162,12 +162,12 @@ def main(): elif args.kind == 'quantum': for fname in filelist: try: - compare_quantum_files(folder1 + "/" + fname, folder2 + "/" + fname, precision=args.precision) + compare_quantum_files(os.path.join(folder1, fname), os.path.join(folder2, fname), precision=args.precision) except IndexError: print(folder1 + "/" + fname, "does not match regular quantum file format") elif args.kind == 'intensity': for fname in filelist: - compare_intensity_files(folder1 + "/" + fname, folder2 + "/" + fname, precision=args.precision) + compare_intensity_files(os.path.join(folder1, fname), os.path.join(folder2, fname), precision=args.precision) exit(0) From 3d8d80a784dc50e8ec6184922dade7284df710bc Mon Sep 17 00:00:00 2001 From: Jamie Quinn Date: Wed, 2 Jun 2021 16:48:11 +0100 Subject: [PATCH 63/79] Cleanup makefile (fixes #33) --- makefile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/makefile b/makefile index e3ac0bc..83bafaf 100644 --- a/makefile +++ b/makefile @@ -177,7 +177,7 @@ mol_c3h6.o: mol_c3h6.f90 accuracy.o moltype.o mol_ch3oh.o: mol_ch3oh.f90 accuracy.o moltype.o lapack.o pot_ch3oh.o mol_ch4.o: mol_ch4.f90 accuracy.o moltype.o lapack.o symmetry.o molecules.o: molecules.f90 accuracy.o lapack.o moltype.o mol_xy.o mol_xy2.o mol_xy3.o mol_xy4.o mol_zxy2.o mol_zxy3.o mol_ch3oh.o mol_abcd.o mol_c2h4.o mol_c2h6.o mol_c3h6.o pot_xy2.o pot_xy3.o pot_abcd.o pot_zxy2.o pot_zxy3.o pot_xy4.o pot_ch3oh.o pot_c2h4.o pot_c2h6.o pot_c3h6.o prop_xy2.o prop_xy2_quad.o prop_xy2_spinrot.o prop_xy2_spinspin.o kin_xy2.o symmetry.o $(pot_user).o -moltype.o: moltype.f90 accuracy.o lapack.o accuracy.o accuracy.o accuracy.o accuracy.o accuracy.o accuracy.o accuracy.o accuracy.o +moltype.o: moltype.f90 accuracy.o lapack.o mol_user.o: mol_user.f90 accuracy.o moltype.o lapack.o symmetry.o mol_xy2.o: mol_xy2.f90 accuracy.o moltype.o symmetry.o mol_xy3.o: mol_xy3.f90 accuracy.o moltype.o lapack.o @@ -186,7 +186,7 @@ mol_xy.o: mol_xy.f90 accuracy.o moltype.o mol_zxy2.o: mol_zxy2.f90 accuracy.o moltype.o mol_zxy3.o: mol_zxy3.f90 accuracy.o moltype.o lapack.o mpi_aux.o: mpi_aux.f90 accuracy.o timer.o -perturbation.o: perturbation.f90 accuracy.o molecules.o moltype.o lapack.o plasma.o fields.o timer.o symmetry.o me_numer.o diag.o +perturbation.o: perturbation.f90 accuracy.o molecules.o moltype.o lapack.o plasma.o fields.o timer.o symmetry.o me_numer.o diag.o mpi_aux.o plasma.o: plasma.f90 accuracy.o timer.o pot_abcd.o: pot_abcd.f90 accuracy.o moltype.o lapack.o pot_c2h4.o: pot_c2h4.f90 accuracy.o moltype.o @@ -207,5 +207,5 @@ richmol_data.o: richmol_data.f90 accuracy.o timer.o rotme_cart_tens.o: rotme_cart_tens.f90 accuracy.o timer.o fwigxjpf.o moltype.o accuracy.o symmetry.o: symmetry.f90 accuracy.o timer.o timer.o: timer.f90 accuracy.o -tran.o: tran.f90 accuracy.o timer.o me_numer.o molecules.o fields.o moltype.o symmetry.o perturbation.o +tran.o: tran.f90 accuracy.o timer.o me_numer.o molecules.o fields.o moltype.o symmetry.o perturbation.o mpi_aux.o trove.o: trove.f90 accuracy.o fields.o perturbation.o symmetry.o timer.o moltype.o dipole.o refinement.o tran.o extfield.o From 042127b0a068baf214f78f2c47b851e0de9040e8 Mon Sep 17 00:00:00 2001 From: Jamie Quinn Date: Wed, 2 Jun 2021 17:12:13 +0100 Subject: [PATCH 64/79] Improve error handling in comparison script; it's now much easier to tell which files are differing and where --- test/compare_results.py | 35 ++++++++++++++++++++-------- test/scripts/H2CO/compare_results.sh | 2 +- 2 files changed, 26 insertions(+), 11 deletions(-) diff --git a/test/compare_results.py b/test/compare_results.py index aa5a656..2a615fe 100644 --- a/test/compare_results.py +++ b/test/compare_results.py @@ -11,6 +11,9 @@ "nu": 3, } +class BlockNotFound(Exception): + pass + def find_start_end_block(lines, blockname): """Identify start and end of blocks in .chk files""" start_idx = end_idx = None @@ -22,9 +25,9 @@ def find_start_end_block(lines, blockname): end_idx = i if start_idx is None: - raise Exception(f"{blockname} not found") + raise BlockNotFound(f"{blockname} not found") if end_idx is None: - raise Exception(f"Could not find end of {blockname}") + raise BlockNotFound(f"Could not find end of {blockname}") return start_idx, end_idx @@ -38,7 +41,7 @@ def find_log_block(lines, blockname): break if start_idx is None: - raise Exception(f"{blockname} not found") + raise BlockNotFound(f"{blockname} not found") # find first "done" after we've found blockname for i, line in enumerate(lines[start_idx:]): @@ -47,7 +50,7 @@ def find_log_block(lines, blockname): break if end_idx is None: - raise Exception(f"Could not find end of {blockname}") + raise BlockNotFound(f"Could not find end of {blockname}") return start_idx, end_idx @@ -81,7 +84,10 @@ def read_quantum_energies(fname): """Extract quantum energies from fname""" lines = read_chk_file(fname) lines = strip_newlines(lines) - start, end = find_start_end_block(lines, "Quantum") + try: + start, end = find_start_end_block(lines, "Quantum") + except BlockNotFound as msg: + print(f"ERROR: Quantum block not found in file: {fname}") # take out first 4 lines and last line of block lines = lines[start+4:end] return extract_column(lines, QUANTUM_ENERGY_IDX) @@ -92,7 +98,10 @@ def read_intensity_column(fname, column_name): lines = read_chk_file(fname) lines = strip_newlines(lines) - start, end = find_log_block(lines, "Linestrength") + try: + start, end = find_log_block(lines, "Linestrength") + except BlockNotFound as msg: + print(f"ERROR: Linestrength block not found in file: {fname}") # take out non-data lines lines = lines[start+2:end] @@ -112,27 +121,33 @@ def compare_columns(col1, col2, abs_precision=0.0, rel_precision=1e-10): difference_exists = True print(f"{e1} and {e2} differ by {abs(e1-e2)} at index {i}") - assert not difference_exists + return difference_exists def compare_energy_files(fname1, fname2, column_no, precision=1e-10): """Compare two energy files""" energies1 = read_energy_column(fname1, column_no) energies2 = read_energy_column(fname2, column_no) - compare_columns(energies1, energies2, abs_precision=precision) + if compare_columns(energies1, energies2, abs_precision=precision): + print(f"ERROR: Differences exist between {fname1} and {fname2}") + exit(-1) def compare_quantum_files(fname1, fname2, precision=1e-10): """Compare two files in quantum form""" energies1 = read_quantum_energies(fname1) energies2 = read_quantum_energies(fname2) # Note, this uses the more accurate rel_precision - compare_columns(energies1, energies2, rel_precision=precision) + if compare_columns(energies1, energies2, rel_precision=precision): + print(f"ERROR: Differences exist between {fname1} and {fname2}") + exit(-1) def compare_intensity_files(fname1, fname2, precision=1e-10): """Compare two files in quantum form""" for col_name in INTENSITY_INDICES.keys(): col1 = read_intensity_column(fname1, col_name) col2 = read_intensity_column(fname2, col_name) - compare_columns(col1, col2, abs_precision=precision) + if compare_columns(col1, col2, abs_precision=precision): + print(f"ERROR: Differences exist in column {col_name} between {fname1} and {fname2}") + exit(-1) def main(): parser = argparse.ArgumentParser(description='Compare output files from TROVE') diff --git a/test/scripts/H2CO/compare_results.sh b/test/scripts/H2CO/compare_results.sh index caef145..a59b027 100755 --- a/test/scripts/H2CO/compare_results.sh +++ b/test/scripts/H2CO/compare_results.sh @@ -11,7 +11,7 @@ quantum_files="eigen_descr0_1.chk eigen_descr0_2.chk eigen_descr0_3.chk eigen_de python compare_results.py --kind quantum --folder1 "$folder1" --folder2 "$folder2" $quantum_files -python compare_results.py --kind intensity --precision 1e-6 --folder1 "$folder1" --folder2 "$folder2" file_intensity.out +python compare_results.py --kind intensity --precision 5e-6 --folder1 "$folder1" --folder2 "$folder2" file_intensity.out python compare_results.py --kind column --column 3 --precision 5e-3 --folder1 "$folder1" --folder2 "$folder2" external.chk From e2e352b67de2ad59bebf100ebe26f3c1ae7104c1 Mon Sep 17 00:00:00 2001 From: Jamie Quinn Date: Wed, 2 Jun 2021 17:29:48 +0100 Subject: [PATCH 65/79] TROVE built with gfortran -O0 segfaults; change to -O2 for continuous integration --- makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/makefile b/makefile index 869f04a..b87df31 100644 --- a/makefile +++ b/makefile @@ -60,7 +60,7 @@ else ifeq ($(strip $(COMPILER)),gfortran) ifeq ($(strip $(MODE)),debug) FFLAGS += -O0 -g -Wall -Wextra -fbacktrace else ifeq ($(strip $(MODE)),ci) - FFLAGS += -O0 -g + FFLAGS += -O2 -g else FFLAGS += -O3 endif From 1250f5292572c9ec4c226f1c98887f8301cbc0f3 Mon Sep 17 00:00:00 2001 From: Jamie Quinn Date: Wed, 2 Jun 2021 17:50:46 +0100 Subject: [PATCH 66/79] Incorporate MPI options into makefile --- makefile | 45 ++++++++++++++++++--------------------------- 1 file changed, 18 insertions(+), 27 deletions(-) diff --git a/makefile b/makefile index b87df31..d2fd646 100644 --- a/makefile +++ b/makefile @@ -12,28 +12,13 @@ pot_user = pot_H2O_Conway COMPILER ?= intel MODE ?= release - -# === MPI - -#FOR = mpif90 -##FFLAGS = -qopenmp -xHost -O3 -ip -g3 -traceback -DTROVE_USE_MPI_ -#FFLAGS = -fopenmp -ffree-line-length-512 -march=native -O0 -fcray-pointer -g -fallow-argument-mismatch -fbacktrace -DTROVE_USE_MPI_ - -##LAPACK = -mkl=parallel -lmkl_scalapack_lp64 -lmkl_blacs_intelmpi_lp64 -#LAPACK = -L${MKLROOT}/lib/intel64 -Wl,--no-as-needed -lmkl_gf_lp64 -lmkl_gnu_thread -lmkl_blacs_intelmpi_lp64 -lmkl_scalapack_lp64 -lmkl_core -lgomp -lpthread -lm -ldl - -#ARPACK = -larpack - -#LIB = $(LAPACK) $(ARPACK) - -# === MPI +USE_MPI ?= # Intel ####### ifeq ($(strip $(COMPILER)),intel) FOR = ifort - FFLAGS = -cpp -qopenmp -module $(OBJDIR) - LAPACK = -mkl + FFLAGS = -cpp -ip -align -ansi-alias -traceback -mcmodel=medium -parallel -nostandard-realloc-lhs -qopenmp -module $(OBJDIR) ifeq ($(strip $(MODE)),debug) FFLAGS += -O0 -g @@ -41,9 +26,10 @@ ifeq ($(strip $(COMPILER)),intel) FFLAGS += -O3 endif - # Alternative flags: - #FFLAGS = -ip -align -ansi-alias -traceback -qopenmp -mcmodel=medium -parallel -nostandard-realloc-lhs -module $(OBJDIR) - #LAPACK = -mkl=parallel + LAPACK = -mkl=parallel + ifdef USE_MPI + LAPACK += -lmkl_scalapack_lp64 -lmkl_blacs_intelmpi_lp64 + endif # gfortran ########## @@ -53,7 +39,7 @@ else ifeq ($(strip $(COMPILER)),gfortran) GCC_VERSION_GT_10 := $(shell expr `gcc -dumpversion | cut -f1 -d.` \>= 10) ifeq "${GCC_VERSION_GT_10}" "1" - # gcc 10 complains about mismatched argument types + # gcc 10+ complains about mismatched argument types FFLAGS += -fallow-argument-mismatch endif @@ -65,24 +51,29 @@ else ifeq ($(strip $(COMPILER)),gfortran) FFLAGS += -O3 endif - # Use non-MKL LAPACK: - #LAPACK = -llapack -lblas - - # Use MKL LAPACK: - LAPACK += -L${MKLROOT}/lib/intel64 -Wl,--no-as-needed -lmkl_gf_lp64 -lmkl_sequential -lmkl_core -lpthread -lm -ldl + LAPACK = -L${MKLROOT}/lib/intel64 -Wl,--no-as-needed -lmkl_gf_lp64 -lmkl_gnu_thread -lmkl_core -lgomp -lpthread -lm -ldl + ifdef USE_MPI + LAPACK += -lmkl_blacs_intelmpi_lp64 -lmkl_scalapack_lp64 + endif else $(error Compiler option "$(COMPILER)" not defined.) endif CPPFLAGS = -D_EXTFIELD_DEBUG_ +ifdef USE_MPI + FOR = mpif90 + FFLAGS += -DTROVE_USE_MPI_ + ARPACK = -larpack +endif + ################################################################################ ## LIBRARIES ################################################################################ WIGXJPF_DIR = wigxjpf-1.5 WIGXJPF_LIB = $(WIGXJPF_DIR)/lib/libwigxjpf.a -LIB = $(LAPACK) $(LIBS) $(WIGXJPF_LIB) +LIB = $(LAPACK) $(LIBS) $(WIGXJPF_LIB) $(ARPACK) ################################################################################ ## SOURCES & DIRECTORIES From 4096f62b25ab3e905c021f15b3800b9e38832033 Mon Sep 17 00:00:00 2001 From: Jamie Quinn Date: Thu, 3 Jun 2021 14:58:49 +0100 Subject: [PATCH 67/79] Move traceback flag to debug options --- makefile | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/makefile b/makefile index d2fd646..d333d9f 100644 --- a/makefile +++ b/makefile @@ -18,10 +18,10 @@ USE_MPI ?= ####### ifeq ($(strip $(COMPILER)),intel) FOR = ifort - FFLAGS = -cpp -ip -align -ansi-alias -traceback -mcmodel=medium -parallel -nostandard-realloc-lhs -qopenmp -module $(OBJDIR) + FFLAGS = -cpp -ip -align -ansi-alias -mcmodel=medium -parallel -nostandard-realloc-lhs -qopenmp -module $(OBJDIR) ifeq ($(strip $(MODE)),debug) - FFLAGS += -O0 -g + FFLAGS += -O0 -g -traceback else FFLAGS += -O3 endif @@ -64,7 +64,6 @@ CPPFLAGS = -D_EXTFIELD_DEBUG_ ifdef USE_MPI FOR = mpif90 FFLAGS += -DTROVE_USE_MPI_ - ARPACK = -larpack endif ################################################################################ From 2652c80d80a226eb1ce8aea05dd627ea7e7d9579 Mon Sep 17 00:00:00 2001 From: ageorgou <1186102+ageorgou@users.noreply.github.com> Date: Tue, 29 Jun 2021 17:01:28 +0100 Subject: [PATCH 68/79] Include Intel compiler and MPI in CI (#36) * Start running with Intel compiler * Use correct compiler command I don't like this but it seems there's no way to conditionally set the environment variable. See also https://github.com/actions/runner/issues/409 Will possibly change to conditional steps later. * Remember Intel-related parameters The scripts set the PATH (and maybe other variables?) but those are not preserved across steps. Since the build step is becoming more distinct between the two compilers, let's try splitting it in two conditional ones. * Try caching Intel installation * Look for Intel libraries before tests * Try to cache whole directory * Allow running tests with MPI * Fix syntax * Install MPI compilers when needed * Try to force use of ifort with MPI By default, the Intel version of mpif90 uses gfortran. We can either switch to using mpiifort instead, or try to control the underlying compiler this way. See also, for example: https://www.hpc.cineca.it/center_news/important-use-intel-mpi-wrappers-mpif90-mpicc-mpicxx * Make sure cache names don't clash with/without MPI * Update variables so MKL and BLACS can be found * Fix MKL installation * Show some more info and build faster on CI * Only use one MPI process on CI for debugging See if the error still happens when testing. * Avoid reading from stdin * Build faster on CI with gfortran * Avoid segmentation fault with quick gfortran build This reverts commit b8413c6593b4a1b7344239801683ccb5ce499539. * Replicate execution on cluster temporarily Adding the mpiio option may be required for now but will eventually be removed. * Don't test file_intensity with MPI It's currently failing when run with MPI and > 1 processes (not just on GitHub Actions, also on CSD3). * Simplify USE_MPI in Makefile and CI Now behaving similar to the other Makefile variables. This also lets us make the GitHub Actions job a bit simpler. * Don't install MKL twice when using MPI, clean up --- .github/workflows/ci.yml | 72 ++++++++++++++++++++++++---- makefile | 10 ++-- test/run_regression_tests.sh | 4 +- test/scripts/H2CO/compare_results.sh | 4 +- test/scripts/H2CO/run_benchmark.sh | 21 ++++++-- test/scripts/set_io_format.sh | 22 +++++++++ 6 files changed, 113 insertions(+), 20 deletions(-) create mode 100755 test/scripts/set_io_format.sh diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2a80e67..4c6f29e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -7,9 +7,10 @@ on: jobs: test: - name: TROVE ${{ matrix.os }} - ${{ matrix.arch }} - ${{ matrix.compiler }} + name: TROVE ${{ matrix.os }} - ${{ matrix.arch }} - ${{ matrix.compiler }} - MPI:${{ matrix.mpi }} env: OMP_NUM_THREADS: 1 + USE_MPI: ${{ matrix.mpi && 1 || 0 }} runs-on: ${{ matrix.os }} strategy: fail-fast: false @@ -20,16 +21,40 @@ jobs: - x64 compiler: - gfortran + - intel + mpi: + - true + - false + exclude: + # only use MPI with intel + - compiler: gfortran + mpi: true steps: - uses: actions/checkout@v2 - #- name: Cache benchmark data - #uses: actions/cache@v2 - #env: - #cache-name: benchmark-data - #with: - #path: test/benchmarks - #key: benchmark-data + - name: Restored cached Intel install + id: cache_intel + if: ${{ matrix.compiler == 'intel' }} + uses: actions/cache@v2 + with: + key: oneapi-${{ matrix.os }}-${{ matrix.arch }}-MPI:${{ matrix.mpi }} + path: | + /opt/intel/oneapi/* + !/opt/intel/oneapi/conda_channel + !/opt/intel/oneapi/debugger + !/opt/intel/oneapi/licensing + + - name: Setup Intel compiler and MPI + if: ${{ matrix.compiler == 'intel' && steps.cache_intel.outputs.cache-hit != 'true'}} + run: | + wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS-2023.PUB + sudo apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS-2023.PUB + echo "deb https://apt.repos.intel.com/oneapi all main" | sudo tee /etc/apt/sources.list.d/oneAPI.list + sudo apt-get update -o Dir::Etc::sourcelist="sources.list.d/oneAPI.list" -o APT::Get::List-Cleanup="0" + sudo apt-get install -y intel-oneapi-compiler-fortran + if [ ${USE_MPI} = 1 ]; then + sudo apt-get install -y intel-oneapi-mpi-devel intel-oneapi-mkl-devel + fi - name: Setup python uses: actions/setup-python@v1 @@ -38,12 +63,39 @@ jobs: architecture: x64 - name: Install Intel MKL + # MKL will have been installed alongside MPI if we're using that + if: ${{ !matrix.mpi }} run: sudo apt-get install -y intel-mkl - - name: Build + - name: Build (gfortran) + if: ${{ matrix.compiler == 'gfortran' }} run: | gfortran --version make COMPILER=gfortran MODE=ci + - name: Build (intel) + if: ${{ matrix.compiler == 'intel' }} + run: | + source /opt/intel/oneapi/compiler/latest/env/vars.sh + source /opt/intel/oneapi/setvars.sh + if [ -d "/opt/intel/oneapi/mkl" ]; then + source /opt/intel/oneapi/mkl/latest/env/vars.sh + fi + # mpif90 uses gfortran by default unless changed like this + export I_MPI_F90=ifort + ifort --version + make COMPILER=intel MODE=ci USE_MPI=${USE_MPI} + - name: Test - run: make test + env: + # nproc is used in the test script for the number of processes + nproc: ${{ matrix.mpi && 2 || 1 }} + run: | + if [[ -d "/opt/intel/oneapi" ]]; then + source /opt/intel/oneapi/compiler/latest/env/vars.sh + source /opt/intel/oneapi/setvars.sh + if [ ${USE_MPI} = 1 ]; then + source /opt/intel/oneapi/mkl/latest/env/vars.sh + fi + fi + make test diff --git a/makefile b/makefile index d333d9f..6a4c350 100644 --- a/makefile +++ b/makefile @@ -12,7 +12,7 @@ pot_user = pot_H2O_Conway COMPILER ?= intel MODE ?= release -USE_MPI ?= +USE_MPI ?= 0 # Intel ####### @@ -22,12 +22,14 @@ ifeq ($(strip $(COMPILER)),intel) ifeq ($(strip $(MODE)),debug) FFLAGS += -O0 -g -traceback + else ifeq ($(strip $(MODE)),ci) + FFLAGS += -O0 -g else FFLAGS += -O3 endif LAPACK = -mkl=parallel - ifdef USE_MPI + ifneq ($(strip $(USE_MPI)),0) LAPACK += -lmkl_scalapack_lp64 -lmkl_blacs_intelmpi_lp64 endif @@ -52,7 +54,7 @@ else ifeq ($(strip $(COMPILER)),gfortran) endif LAPACK = -L${MKLROOT}/lib/intel64 -Wl,--no-as-needed -lmkl_gf_lp64 -lmkl_gnu_thread -lmkl_core -lgomp -lpthread -lm -ldl - ifdef USE_MPI + ifneq ($(strip $(USE_MPI)),0) LAPACK += -lmkl_blacs_intelmpi_lp64 -lmkl_scalapack_lp64 endif else @@ -61,7 +63,7 @@ endif CPPFLAGS = -D_EXTFIELD_DEBUG_ -ifdef USE_MPI +ifneq ($(strip $(USE_MPI)),0) FOR = mpif90 FFLAGS += -DTROVE_USE_MPI_ endif diff --git a/test/run_regression_tests.sh b/test/run_regression_tests.sh index 53b0728..2fe003a 100755 --- a/test/run_regression_tests.sh +++ b/test/run_regression_tests.sh @@ -4,7 +4,8 @@ set -e exe_name=j-trove.x exe=../$exe_name -nproc=1 +# Use 1 process unless we have specified differently (e.g. in CI) +nproc=${nproc:-1} # Check exe is present if [ ! -f $exe ]; then @@ -27,6 +28,7 @@ for benchmark in H2CO; do cp $exe $wd cp benchmarks/$benchmark/input/*.inp $wd cp scripts/$benchmark/run_benchmark.sh $wd + cp scripts/set_io_format.sh $wd # Run benchmark pushd $wd diff --git a/test/scripts/H2CO/compare_results.sh b/test/scripts/H2CO/compare_results.sh index a59b027..78011b1 100755 --- a/test/scripts/H2CO/compare_results.sh +++ b/test/scripts/H2CO/compare_results.sh @@ -11,7 +11,9 @@ quantum_files="eigen_descr0_1.chk eigen_descr0_2.chk eigen_descr0_3.chk eigen_de python compare_results.py --kind quantum --folder1 "$folder1" --folder2 "$folder2" $quantum_files -python compare_results.py --kind intensity --precision 5e-6 --folder1 "$folder1" --folder2 "$folder2" file_intensity.out +if [ ${USE_MPI} != 1 ]; then + python compare_results.py --kind intensity --precision 5e-6 --folder1 "$folder1" --folder2 "$folder2" file_intensity.out +fi python compare_results.py --kind column --column 3 --precision 5e-3 --folder1 "$folder1" --folder2 "$folder2" external.chk diff --git a/test/scripts/H2CO/run_benchmark.sh b/test/scripts/H2CO/run_benchmark.sh index 644e8a2..c3176d7 100755 --- a/test/scripts/H2CO/run_benchmark.sh +++ b/test/scripts/H2CO/run_benchmark.sh @@ -14,13 +14,26 @@ export OMP_NUM_THREADS=$nproc # Ensure stacksize unlimited (for fortran) ulimit -d unlimited -LAUNCH="time" +if [ ${USE_MPI} = 1 ]; then + LAUNCH="time mpirun -ppn 1 -np $nproc" + ./set_io_format.sh enable + echo "Will run with MPI" +else + LAUNCH="time" + echo "Will run without MPI" +fi echo "Time: `date`" echo "Current directory: `pwd`" - -for name in file{1..12} file_intensity; do - $LAUNCH ./$exe < $name.inp > $name.out +echo "Using ${nproc} process(es)" + +files_to_check=(file{1..12}) +if [ ${USE_MPI} != 1 ]; then + # The intensity file does not work with MPI at the moment + files_to_check+=(file_intensity) +fi +for name in ${files_to_check[@]}; do + $LAUNCH ./$exe $name.inp -o $name > $name.out done echo "DONE" diff --git a/test/scripts/set_io_format.sh b/test/scripts/set_io_format.sh new file mode 100755 index 0000000..eeff9d2 --- /dev/null +++ b/test/scripts/set_io_format.sh @@ -0,0 +1,22 @@ +#!/usr/bin/env bash + +files=$(ls *.inp) +option=$1 + +if [[ $option == "enable" ]]; then + echo "enable" + for f in $files; do + if ! grep -qi MPIIO $f; then + sed -i '/eigenfunc/ a format MPIIO' $f + fi + done +elif [[ $option == "disable" ]]; then + echo "disable" + for f in $files; do + if grep -qi MPIIO $f; then + sed -i '/MPIIO/d' $f + fi + done +else + echo "options are enable or disable" +fi From 9cdb17f47f4d413a126892ba3cfcb7cd265cf065 Mon Sep 17 00:00:00 2001 From: Jamie Quinn Date: Thu, 5 Aug 2021 11:50:36 +0100 Subject: [PATCH 69/79] Properly copy array, don't just re-reference (fixes double free error) --- fields.f90 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fields.f90 b/fields.f90 index 9749f35..9ce2268 100644 --- a/fields.f90 +++ b/fields.f90 @@ -6662,7 +6662,7 @@ subroutine Lmat_generation1d ! bm = 0.0_ark ; bm(Nequat-Nmodes+imode) = 1.0_ark ! - a = Tmat + a(:,:) = Tmat(:,:) b(:,1) = bm(:) ! call MLlinurark(Nequat,a,b(:,1),bm,ierror) From ec16a553149d28ee75839ba09662d0319bf83edc Mon Sep 17 00:00:00 2001 From: Jamie Quinn Date: Thu, 5 Aug 2021 12:20:43 +0100 Subject: [PATCH 70/79] Fix array bounds error --- fields.f90 | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/fields.f90 b/fields.f90 index 9ce2268..4ccbfa4 100644 --- a/fields.f90 +++ b/fields.f90 @@ -4391,9 +4391,11 @@ subroutine FLReadInput(NPTorder,Npolyads,Natoms,Nmodes,Jrot) ! endif ! - if (trim(trove%symmetry)=='C2VN'.and.sym%N Date: Thu, 5 Aug 2021 13:03:38 +0100 Subject: [PATCH 71/79] Fix more array bounds errors --- fields.f90 | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/fields.f90 b/fields.f90 index 4ccbfa4..f083d33 100644 --- a/fields.f90 +++ b/fields.f90 @@ -6527,7 +6527,7 @@ subroutine Lmat_generation1d ! kindex = 0 ; kindex(imode) = 1 ! - coordtransform(:,imode) = FLvect_finitediffs(job_is,trove%Ncoords,kindex,q_eq,step,irho) + coordtransform(:,imode) = FLvect_finitediffs(job_is,trove%Nmodes,kindex,q_eq,step,irho) ! !chi2 = MLcoordinate_transform_func(q2,size(chi2),dir) ! @@ -15778,12 +15778,12 @@ subroutine FLCompact_a_field_sparse(fl,name) ! Create a field in a sparse representaion ! if (associated(fl%IndexQ)) then - deallocate(fl%IndexQ) call ArrayMinus(name//'IndexQ',isize=size(fl%IndexQ),ikind=kind(fl%IndexQ)) + deallocate(fl%IndexQ) endif ! - deallocate(fl%ifromsparse) call ArrayMinus(name//'ifromsparse',isize=size(fl%ifromsparse),ikind=kind(fl%ifromsparse)) + deallocate(fl%ifromsparse) ! if (Nterms==0) then ! @@ -15959,9 +15959,9 @@ subroutine FLCompact_and_combine_fields_sparse(fl1,name1,fl2,name2) call ArrayStart(name1,alloc,size(fl1%iorder),kind(fl1%iorder)) call ArrayStart(name2,alloc,size(fl2%iorder),kind(fl2%iorder)) ! - deallocate(fl1%field,fl2%field,stat=alloc) call ArrayMinus(name1,isize=size(fl1%field),ikind=kind(fl1%field)) call ArrayMinus(name2,isize=size(fl2%field),ikind=kind(fl2%field)) + deallocate(fl1%field,fl2%field,stat=alloc) ! allocate(fl1%field(nterms,0:Npoints),fl2%field(nterms,0:Npoints),stat=alloc) call ArrayStart(name1,alloc,size(fl1%field),kind(fl1%field)) @@ -22626,7 +22626,7 @@ function poten_chi(chi) result (f) ! call FLfromcartesian2local(r_na,r_) dir = .true. - chi_ = MLcoordinate_transform_func(r_,size(r_),dir) + chi_ = MLcoordinate_transform_func(r_,size(chi_),dir) ! do i = 1,trove%Nmodes ! From a1d90abe97f1f25b6eeee39556c886035a80b6e4 Mon Sep 17 00:00:00 2001 From: Jamie Quinn Date: Thu, 5 Aug 2021 15:33:37 +0100 Subject: [PATCH 72/79] Comment out segfaulting deallocation --- perturbation.f90 | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/perturbation.f90 b/perturbation.f90 index 8a20d54..0204282 100644 --- a/perturbation.f90 +++ b/perturbation.f90 @@ -8509,11 +8509,11 @@ subroutine PThamiltonian_contract(jrot) endif ! ! free more memory - do ielem =1,size(contr(0)%rot(:)) - if (associated(contr(0)%rot(ielem)%coeff3d)) deallocate(contr(0)%rot(ielem)%coeff3d) - enddo - call ArrayStart('contr(0)-rot-coeff3d',0,1,4) - call ArrayStop('contr(0)-rot-coeff3d') + !do ielem =1,size(contr(0)%rot(:)) + !if (associated(contr(0)%rot(ielem)%coeff3d)) deallocate(contr(0)%rot(ielem)%coeff3d) + !enddo + !call ArrayStart('contr(0)-rot-coeff3d',0,1,4) + !call ArrayStop('contr(0)-rot-coeff3d') ! deallocate(ijterm,k_row) ! From 991ef2aaea713883d421d197934db0ca60aea019 Mon Sep 17 00:00:00 2001 From: Jamie Quinn Date: Mon, 9 Aug 2021 10:10:05 +0100 Subject: [PATCH 73/79] Refactor symm_mat_element_vector Previously there were two versions of this function, `symm_mat_element_vector` and `symm_mat_element_vector_k`, each dealing with a different symmetry in the molecule. These have been refactored into one function. This means molecules with euler symmetry can be processed with the MPI version of TROVE (at least for e.g. file1 of the CH4 benchmark). --- perturbation.f90 | 384 ++++++++++++++++++++--------------------------- 1 file changed, 164 insertions(+), 220 deletions(-) diff --git a/perturbation.f90 b/perturbation.f90 index 0204282..127dc2e 100644 --- a/perturbation.f90 +++ b/perturbation.f90 @@ -7737,15 +7737,7 @@ subroutine PThamiltonian_contract(jrot) ! ! ithread = omp_get_thread_num() ! - if (job%rotsym_do) then - ! - call symm_mat_element_vector(jrot,irow,ijterm,PTmatrixelements_contr_gamma,mat_t) - ! - else - ! - call symm_mat_element_vector_k(jrot,irow,ijterm,PTmatrixelements_contr,mat_t,no_diagonalization) - ! - endif + call symm_mat_element_vector(jrot,irow,ijterm,PTmatrixelements_contr,mat_t,no_diagonalization) ! do isym = 1,sym%Nrepresen ! @@ -7942,17 +7934,7 @@ subroutine PThamiltonian_contract(jrot) ! ! ithread = omp_get_thread_num() ! - !call symm_mat_element_vector_k(jrot,irow,ijterm,PTmatrixelements_contr_grot,mat_t) - ! - if (job%rotsym_do) then - ! - call symm_mat_element_vector(jrot,irow,ijterm,PTmatrixelements_symrot_contr_grot,mat_t) - ! - else - ! - call symm_mat_element_vector_k(jrot,irow,ijterm,PTmatrixelements_contr_grot,mat_t,no_diagonalization) - ! - endif + call symm_mat_element_vector(jrot,irow,ijterm,PTmatrixelements_contr_grot,mat_t,no_diagonalization) ! do isym = 1,sym%Nrepresen ! @@ -8014,17 +7996,7 @@ subroutine PThamiltonian_contract(jrot) ! ithread = 1 ! ithread = omp_get_thread_num() ! - !call symm_mat_element_vector_k(jrot,irow,ijterm,PTmatrixelements_contr_gcor,mat_t) - ! - if (job%rotsym_do) then - ! - call symm_mat_element_vector(jrot,irow,ijterm,PTmatrixelements_symrot_contr_gcor,mat_t) - ! - else - ! - call symm_mat_element_vector_k(jrot,irow,ijterm,PTmatrixelements_contr_gcor,mat_t) - ! - endif + call symm_mat_element_vector(jrot,irow,ijterm,PTmatrixelements_contr_gcor,mat_t) ! do isym = 1,sym%Nrepresen ! @@ -8091,17 +8063,7 @@ subroutine PThamiltonian_contract(jrot) ! ithread = 1 ! ithread = omp_get_thread_num() ! - !call symm_mat_element_vector_k(jrot,irow,ijterm,PTmatrixelements_contr_hvib,mat_t) - ! - if (job%rotsym_do) then - ! - call symm_mat_element_vector(jrot,irow,ijterm,PTmatrixelements_symrot_contr_hvib,mat_t) - ! - else - ! - call symm_mat_element_vector_k(jrot,irow,ijterm,PTmatrixelements_contr_hvib,mat_t) - ! - endif + call symm_mat_element_vector(jrot,irow,ijterm,PTmatrixelements_contr_hvib,mat_t) ! do isym = 1,sym%Nrepresen ! @@ -9850,7 +9812,7 @@ end subroutine PTrestore_rot_kinetic_matrix_elements ! We construct the Hamiltonian matrix in symm. adapted representaion ! for the K-factorized rotational basis ! - recursive subroutine symm_mat_element_vector_k(jrot,irow,ijterm,func,mat_t,no_diagonalization) + subroutine symm_mat_element_vector(jrot,irow,ijterm,func,mat_t,no_diagonalization) use mpi_aux ! integer(ik),intent(in) :: jrot,irow,ijterm(:,:) @@ -9861,79 +9823,63 @@ recursive subroutine symm_mat_element_vector_k(jrot,irow,ijterm,func,mat_t,no_di integer(ik) :: cnu_i(0:PT%Nclasses),cnu_j(0:PT%Nclasses) integer(ik) :: deg_i(0:PT%Nclasses),deg_j(0:PT%Nclasses) real(rk) :: mat_elem - integer(ik) :: isize,jsize,ielem,jelem,k_i,k_j,tau_i,tau_j + integer(ik) :: isize,jsize,ielem,jelem, k_i,k_j,tau_i,tau_j integer(ik) :: jrow,ideg,jdeg,isym,jsym,iL,iR,iterm,jterm,icontr,jcontr real(rk) :: vec_i(PT%max_deg_size),vec_j(PT%max_deg_size) ! real(rk), dimension(:,:,:), allocatable :: hcontr ! - !call TimerStart('Symmetrized Hamiltonian - one column') - ! mat_t = 0 ! cnu_i(:) = PT%contractive_space(:,irow) ! isize = PT%Index_deg(irow)%size1 ! - ! AT: hcontr is now an array of irow * PT%max_deg_size^2. This way we can calculate all hcontr values in advance, + ! AT: hcontr is an array of irow * PT%max_deg_size^2. This way we can calculate all hcontr values in advance, ! collect them to root, then run the matelem calculation loop. ! The reduction to root is necessary as we are doing apparently random access over a distributed matrix. allocate(hcontr(PT%max_deg_size,PT%max_deg_size,irow)) + hcontr = 0.0 ! do jrow = 1,irow - ! if ( present(no_diagonalization).and.no_diagonalization.and.jrow/=irow ) cycle - ! + cnu_j(:) = PT%contractive_space(:,jrow) - ! jsize = PT%Index_deg(jrow)%size1 - ! + do ideg = 1,isize - ! deg_i(:) = PT%Index_deg(irow)%icoeffs(:,ideg) - ! do jdeg = 1,jsize - !hcontr(ideg,jdeg) = 0.0_rk - ! deg_j(:) = PT%Index_deg(jrow)%icoeffs(:,jdeg) - ! - !iroot = contr(iclass)%iroot(cnu_i(iclass),deg_i(iclass)) - !jroot = contr(iclass)%iroot(cnu_j(iclass),deg_j(iclass)) - ! + icontr = PT%icase2icontr(irow,ideg) jcontr = PT%icase2icontr(jrow,jdeg) - k_i = PT%rot_index(cnu_i(0),deg_i(0))%k - k_j = PT%rot_index(cnu_j(0),deg_j(0))%k - tau_i = PT%rot_index(cnu_i(0),deg_i(0))%tau - tau_j = PT%rot_index(cnu_j(0),deg_j(0))%tau - ! - ! Matrix elements - ! + if (jcontr .lt. co_startdim .or. jcontr .gt. co_enddim) then hcontr(ideg,jdeg,jrow) = 0.0_rk else - hcontr(ideg,jdeg,jrow) = func(icontr,jcontr,jrot,k_i,k_j,tau_i,tau_j) + if (job%rotsym_do) then + hcontr(ideg,jdeg,jrow) = func(icontr,jcontr,jrot,cnu_i(0),cnu_j(0),deg_i(0),deg_j(0)) + else + k_i = PT%rot_index(cnu_i(0),deg_i(0))%k + k_j = PT%rot_index(cnu_j(0),deg_j(0))%k + tau_i = PT%rot_index(cnu_i(0),deg_i(0))%tau + tau_j = PT%rot_index(cnu_j(0),deg_j(0))%tau + hcontr(ideg,jdeg,jrow) = func(icontr,jcontr,jrot,k_i,k_j,tau_i,tau_j) + endif endif - - ! enddo - ! enddo end do ! ! Collect all pre-calculated hcontr values to MPI root. Non-local values have been initialised to 0 so it's safe to just do - ! MPI_SUM. call co_sum(hcontr, 0) - !if (mpi_rank .eq. 0) then - ! call mpi_reduce(mpi_in_place, hcontr, size(hcontr), mpi_double_precision, mpi_sum, 0, mpi_comm_world) - !else - ! call mpi_reduce(hcontr, hcontr, size(hcontr), mpi_double_precision, mpi_sum, 0, mpi_comm_world) - !end if ! ! We could do an allreduce above then distribute this loop, but all subsequent calculation is serialised so far. ! TODO future work? if (mpi_rank .eq. 0) then do jrow=1,irow + jsize = PT%Index_deg(jrow)%size1 do isym = 1,sym%Nrepresen ! iterm = ijterm(irow,isym) @@ -9975,7 +9921,7 @@ recursive subroutine symm_mat_element_vector_k(jrot,irow,ijterm,func,mat_t,no_di PT%symactive_space(isym)%sym_N(iterm+ielem,1),irow write(out,"(' or PT%symactive_space(isym)%sym_N(2)/=ielem ',2i8)") & PT%symactive_space(isym)%sym_N(iterm+ielem,2),ielem - stop 'symm_mat_element_vector_k: something wrong with sym-counting' + stop 'symm_mat_element_vector: something wrong with sym-counting' endif endif ! @@ -10006,7 +9952,7 @@ recursive subroutine symm_mat_element_vector_k(jrot,irow,ijterm,func,mat_t,no_di ! ! special case for linear molecules and E-symmetries. Not an ideal solution! if (trove%lincoord==0.or.all( (/isym,jsym/)<=4 ) ) then - stop 'symm_mat_element_vector_k: non-zero element between two symmetries' + stop 'symm_mat_element_vector: non-zero element between two symmetries' endif endif endif @@ -10025,155 +9971,153 @@ recursive subroutine symm_mat_element_vector_k(jrot,irow,ijterm,func,mat_t,no_di ! !call TimerStop('Symmetrized Hamiltonian - one column') ! - end subroutine symm_mat_element_vector_k - - + end subroutine symm_mat_element_vector ! ! In this version of the routine we construct the Hamiltonian matrix in symm. adapted representaion ! for rotational basis which is not factorized with "K". ! - recursive subroutine symm_mat_element_vector(jrot,irow,ijterm,func,mat_t) + !subroutine symm_mat_element_vector(jrot,irow,ijterm,func,mat_t,no_diagonalization) - integer(ik),intent(in) :: jrot,irow,ijterm(:,:) - real(rk),external :: func - real(rk),intent(out) :: mat_t(:,:,:) - ! - integer(ik) :: cnu_i(0:PT%Nclasses),cnu_j(0:PT%Nclasses) - integer(ik) :: deg_i(0:PT%Nclasses),deg_j(0:PT%Nclasses) - real(rk) :: mat_elem - integer(ik) :: isize,jsize,ielem,jelem - integer(ik) :: jrow,ideg,jdeg,isym,jsym,iL,iR,iterm,jterm,icontr,jcontr - real(rk) :: hcontr(PT%max_deg_size,PT%max_deg_size) - real(rk) :: vec_i(PT%max_deg_size),vec_j(PT%max_deg_size) - ! - !call TimerStart('Symmetrized Hamiltonian - one column') - ! - mat_t = 0 - ! - cnu_i(:) = PT%contractive_space(:,irow) - ! - isize = PT%Index_deg(irow)%size1 - ! - do jrow = 1,irow - ! - cnu_j(:) = PT%contractive_space(:,jrow) - ! - jsize = PT%Index_deg(jrow)%size1 - ! - do ideg = 1,isize - ! - deg_i(:) = PT%Index_deg(irow)%icoeffs(:,ideg) - ! - do jdeg = 1,jsize - ! - deg_j(:) = PT%Index_deg(jrow)%icoeffs(:,jdeg) - ! - !iroot = contr(iclass)%iroot(cnu_i(0),deg_i(0)) - !jroot = contr(iclass)%iroot(cnu_j(0),deg_j(0)) - ! - icontr = PT%icase2icontr(irow,ideg) - jcontr = PT%icase2icontr(jrow,jdeg) - !k_i = PT%rot_index(cnu_i(0),deg_i(0))%k - !k_j = PT%rot_index(cnu_j(0),deg_j(0))%k - !tau_i = PT%rot_index(cnu_i(0),deg_i(0))%tau - !tau_j = PT%rot_index(cnu_j(0),deg_j(0))%tau - ! - ! Matrix elements - ! - hcontr(ideg,jdeg) = func(icontr,jcontr,jrot,cnu_i(0),cnu_j(0),deg_i(0),deg_j(0)) - ! - enddo - ! - enddo - ! - do isym = 1,sym%Nrepresen - ! - iterm = ijterm(irow,isym) - ! - do jsym = 1,isym - ! - jterm = ijterm(jrow,jsym) - ! - do ielem = 1,PT%irr(isym)%N(irow) - ! - vec_i(1:isize) = PT%irr(isym)%repres(iterm+ielem,1,1:isize) - ! - do jelem = 1,PT%irr(jsym)%N(jrow) - ! - vec_j(1:jsize) = PT%irr(jsym)%repres(jterm+jelem,1,1:jsize) - ! - vec_j(1:isize) = matmul(hcontr(1:isize,1:jsize),vec_j(1:jsize)) - ! - mat_elem = dot_product(vec_i(1:isize),vec_j(1:isize)) - ! - if (isym==jsym) then - ! - iL = ielem - iR = jterm+jelem - ! - if (iterm+ielem(10.0_rk)**(-(rk-3))) then - ! - ! We print out non-zero mat. elements between different symmetries, which have to be zero. - ! - if (job%verbose>=6) & - write(out,"('<',a4,2i6,'|H|',a4,2i6,'> = ',g18.10)") & - sym%label(isym),irow,iterm+ielem,sym%label(jsym),jrow,jterm+jelem,mat_elem - ! - ! if this error is too big - we stop - ! - if(abs(mat_elem)>1.0_rk) then - !write(out,"(/'A non-diagonal mat. element between different symmetries:')") - write(out,"(/'<',a4,3i6,'|H|',a4,3i6,'> = ',g18.10,a)") & - sym%label(isym),irow,ielem,iterm+ielem,sym%label(jsym),jrow,jelem,jterm+jelem,mat_elem,& - ' Non-diagonal element (euler) between different symmetries is too large!' - stop 'non-zero element between two symmetries - symm_mat_element_vector' - endif - ! - endif - endif - ! - enddo - enddo - enddo - ! - enddo - ! - enddo - ! - ! - !call TimerStop('Symmetrized Hamiltonian - one column') - ! - ! - end subroutine symm_mat_element_vector + !integer(ik),intent(in) :: jrot,irow,ijterm(:,:) + !real(rk),external :: func + !real(rk),intent(out) :: mat_t(:,:,:) + !logical,optional,intent(in) :: no_diagonalization + !! + !integer(ik) :: cnu_i(0:PT%Nclasses),cnu_j(0:PT%Nclasses) + !integer(ik) :: deg_i(0:PT%Nclasses),deg_j(0:PT%Nclasses) + !real(rk) :: mat_elem + !integer(ik) :: isize,jsize,ielem,jelem + !integer(ik) :: jrow,ideg,jdeg,isym,jsym,iL,iR,iterm,jterm,icontr,jcontr + !real(rk) :: vec_i(PT%max_deg_size),vec_j(PT%max_deg_size) + + !real(rk), dimension(:,:,:), allocatable :: hcontr + !allocate(hcontr(PT%max_deg_size,PT%max_deg_size,irow)) + !! + !mat_t = 0 + !! + !cnu_i(:) = PT%contractive_space(:,irow) + !! + !isize = PT%Index_deg(irow)%size1 + !! + !do jrow = 1,irow + !! + !if ( present(no_diagonalization).and.no_diagonalization.and.jrow/=irow ) cycle + !! + !cnu_j(:) = PT%contractive_space(:,jrow) + !! + !jsize = PT%Index_deg(jrow)%size1 + !! + !do ideg = 1,isize + !! + !deg_i(:) = PT%Index_deg(irow)%icoeffs(:,ideg) + !! + !do jdeg = 1,jsize + !! + !deg_j(:) = PT%Index_deg(jrow)%icoeffs(:,jdeg) + !! + !icontr = PT%icase2icontr(irow,ideg) + !jcontr = PT%icase2icontr(jrow,jdeg) + !! + !! Matrix elements + !! + !hcontr(ideg,jdeg,jrow) = func(icontr,jcontr,jrot,cnu_i(0),cnu_j(0),deg_i(0),deg_j(0)) + !! + !enddo + !! + !enddo + !! + !end do + + !do jrow=1,irow + !jsize = PT%Index_deg(jrow)%size1 + !do isym = 1,sym%Nrepresen + !! + !iterm = ijterm(irow,isym) + !! + !do jsym = 1,isym + !! + !jterm = ijterm(jrow,jsym) + !! + !do ielem = 1,PT%irr(isym)%N(irow) + !! + !vec_i(1:isize) = PT%irr(isym)%repres(iterm+ielem,1,1:isize) + !! + !do jelem = 1,PT%irr(jsym)%N(jrow) + !! + !vec_j(1:jsize) = PT%irr(jsym)%repres(jterm+jelem,1,1:jsize) + !! + !vec_j(1:isize) = matmul(hcontr(1:isize,1:jsize, jrow),vec_j(1:jsize)) + !! + !mat_elem = dot_product(vec_i(1:isize),vec_j(1:isize)) + !! + !if (isym==jsym) then + !! + !iL = ielem + !iR = jterm+jelem + !! + !if (iterm+ielem(10.0_rk)**(-(rk-3))) then + !! + !! We print out non-zero mat. elements between different symmetries, which have to be zero. + !! + !if (job%verbose>=6) & + !write(out,"('<',a4,2i6,'|H|',a4,2i6,'> = ',g18.10)") & + !sym%label(isym),irow,iterm+ielem,sym%label(jsym),jrow,jterm+jelem,mat_elem + !! + !! if this error is too big - we stop + !! + !if(abs(mat_elem)>1.0_rk) then + !!write(out,"(/'A non-diagonal mat. element between different symmetries:')") + !write(out,"(/'<',a4,3i6,'|H|',a4,3i6,'> = ',g18.10,a)") & + !sym%label(isym),irow,ielem,iterm+ielem,sym%label(jsym),jrow,jelem,jterm+jelem,mat_elem,& + !' Non-diagonal element (euler) between different symmetries is too large!' + !stop 'non-zero element between two symmetries - symm_mat_element_vector' + !endif + !! + !endif + !endif + !! + !enddo + !enddo + !enddo + !! + !enddo + !! + !enddo + !! + !! + !!call TimerStop('Symmetrized Hamiltonian - one column') + !! + !! + !end subroutine symm_mat_element_vector ! ! In this version of the routine we construct the Hamiltonian matrix in symm. adapted representaion From 313d0988256dc50dffc762c80570455870e557af Mon Sep 17 00:00:00 2001 From: Jamie Quinn Date: Mon, 9 Aug 2021 10:12:24 +0100 Subject: [PATCH 74/79] Assume openmpi if using gfortran and add more debug compilation flags --- makefile | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/makefile b/makefile index 6a4c350..135ff9f 100644 --- a/makefile +++ b/makefile @@ -46,7 +46,7 @@ else ifeq ($(strip $(COMPILER)),gfortran) endif ifeq ($(strip $(MODE)),debug) - FFLAGS += -O0 -g -Wall -Wextra -fbacktrace + FFLAGS += -O0 -g -Wall -Wextra -fbacktrace -finit-local-zero -ffpe-trap=invalid,zero,overflow -fbounds-check -fcheck=all else ifeq ($(strip $(MODE)),ci) FFLAGS += -O2 -g else @@ -55,7 +55,8 @@ else ifeq ($(strip $(COMPILER)),gfortran) LAPACK = -L${MKLROOT}/lib/intel64 -Wl,--no-as-needed -lmkl_gf_lp64 -lmkl_gnu_thread -lmkl_core -lgomp -lpthread -lm -ldl ifneq ($(strip $(USE_MPI)),0) - LAPACK += -lmkl_blacs_intelmpi_lp64 -lmkl_scalapack_lp64 + # Assume we're using openmpi with gfortran + LAPACK += -lmkl_blacs_openmpi_lp64 -lmkl_scalapack_lp64 endif else $(error Compiler option "$(COMPILER)" not defined.) From c9639d6b6eed4a5f1d9dacdbb578c4a04a244242 Mon Sep 17 00:00:00 2001 From: Jamie Quinn Date: Mon, 9 Aug 2021 10:12:41 +0100 Subject: [PATCH 75/79] Fix bash errors --- test/scripts/H2CO/compare_results.sh | 2 +- test/scripts/H2CO/run_benchmark.sh | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/test/scripts/H2CO/compare_results.sh b/test/scripts/H2CO/compare_results.sh index 78011b1..8688c11 100755 --- a/test/scripts/H2CO/compare_results.sh +++ b/test/scripts/H2CO/compare_results.sh @@ -11,7 +11,7 @@ quantum_files="eigen_descr0_1.chk eigen_descr0_2.chk eigen_descr0_3.chk eigen_de python compare_results.py --kind quantum --folder1 "$folder1" --folder2 "$folder2" $quantum_files -if [ ${USE_MPI} != 1 ]; then +if [[ ${USE_MPI} -eq 1 ]]; then python compare_results.py --kind intensity --precision 5e-6 --folder1 "$folder1" --folder2 "$folder2" file_intensity.out fi diff --git a/test/scripts/H2CO/run_benchmark.sh b/test/scripts/H2CO/run_benchmark.sh index c3176d7..ad2b899 100755 --- a/test/scripts/H2CO/run_benchmark.sh +++ b/test/scripts/H2CO/run_benchmark.sh @@ -14,7 +14,7 @@ export OMP_NUM_THREADS=$nproc # Ensure stacksize unlimited (for fortran) ulimit -d unlimited -if [ ${USE_MPI} = 1 ]; then +if [[ ${USE_MPI} -eq 1 ]]; then LAUNCH="time mpirun -ppn 1 -np $nproc" ./set_io_format.sh enable echo "Will run with MPI" @@ -28,7 +28,7 @@ echo "Current directory: `pwd`" echo "Using ${nproc} process(es)" files_to_check=(file{1..12}) -if [ ${USE_MPI} != 1 ]; then +if [[ ${USE_MPI} -ne 1 ]]; then # The intensity file does not work with MPI at the moment files_to_check+=(file_intensity) fi From 846b27801ee39cc86ac23230062c209ccf76a60a Mon Sep 17 00:00:00 2001 From: Jamie Quinn Date: Tue, 10 Aug 2021 18:52:48 +0100 Subject: [PATCH 76/79] Fix unassociated array access --- fields.f90 | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fields.f90 b/fields.f90 index f083d33..5476db0 100644 --- a/fields.f90 +++ b/fields.f90 @@ -14555,7 +14555,9 @@ subroutine KineticSave_ASCII ! fl => trove%g_cor(k1,k2) ! - call write_ascii(k1,k2,fl%Ncoeff,fl%Npoints,chkptIO_kin,fl%ifromsparse,fl%field) + if (associated(fl)) then + call write_ascii(k1,k2,fl%Ncoeff,fl%Npoints,chkptIO_kin,fl%ifromsparse,fl%field) + endif ! enddo enddo From 2e949c911bf043f4f94684284a7b5e929094b8e6 Mon Sep 17 00:00:00 2001 From: Jamie Quinn Date: Wed, 11 Aug 2021 11:26:09 +0100 Subject: [PATCH 77/79] Disable some runtime checks in debug compiler flags (just don't have time to deal with these errors!) --- makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/makefile b/makefile index 135ff9f..a6d8e43 100644 --- a/makefile +++ b/makefile @@ -46,7 +46,7 @@ else ifeq ($(strip $(COMPILER)),gfortran) endif ifeq ($(strip $(MODE)),debug) - FFLAGS += -O0 -g -Wall -Wextra -fbacktrace -finit-local-zero -ffpe-trap=invalid,zero,overflow -fbounds-check -fcheck=all + FFLAGS += -O0 -g -Wall -Wextra -fbacktrace -finit-local-zero -ffpe-trap=invalid,zero,overflow # -fbounds-check -fcheck=all else ifeq ($(strip $(MODE)),ci) FFLAGS += -O2 -g else From 3a2b77c2c238e0fc10e15f898cb1cdaf07279fae Mon Sep 17 00:00:00 2001 From: Jamie Quinn Date: Wed, 11 Aug 2021 11:27:14 +0100 Subject: [PATCH 78/79] Fix incorrect filename and fix spacing in error message --- perturbation.f90 | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/perturbation.f90 b/perturbation.f90 index 127dc2e..3d98e8c 100644 --- a/perturbation.f90 +++ b/perturbation.f90 @@ -8513,7 +8513,7 @@ subroutine open_chkptfile_mpi(fileh, filename, mode) call MPI_File_open(mpi_comm_world, filename, amode, mpi_info_null, fileh, ierr) if (ierr.gt.0) then - if (mpi_rank .eq. 0) write(*,*) "Error opening MPI-IO-formatted Vib. kinetic checkpoint file.", filename + if (mpi_rank .eq. 0) write(*,*) "Error opening MPI-IO-formatted Vib. kinetic checkpoint file. ", filename stop "MPI_PTrestore_rot_kinetic_matrix_elements - Error opening MATELEM MPI-IO input file" endif @@ -8600,10 +8600,7 @@ subroutine PTrestore_rot_kinetic_matrix_elements_mpi(jrot, task, fileh, dimen, & !TODO - MPI-compatible IOStart !call IOStart(trim(job_id),fileh) - !TODO set filename dynamically - filename = trim(job%matelem_suffix)//'.chk' - - call open_chkptfile_mpi(fileh, filename, 'read') + call open_chkptfile_mpi(fileh, job%kinetmat_file, 'read') !Collective read woo call MPI_File_read_all(fileh, readbuf, 7, mpi_character, mpi_status_ignore, ierr) From a754bd9f842f41ff65d97ebfebd76ecf439815b1 Mon Sep 17 00:00:00 2001 From: Jamie Quinn Date: Wed, 11 Aug 2021 11:39:00 +0100 Subject: [PATCH 79/79] Only compare intensity output if MPI disabled --- test/scripts/H2CO/compare_results.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/scripts/H2CO/compare_results.sh b/test/scripts/H2CO/compare_results.sh index 8688c11..1c1ffdc 100755 --- a/test/scripts/H2CO/compare_results.sh +++ b/test/scripts/H2CO/compare_results.sh @@ -11,7 +11,7 @@ quantum_files="eigen_descr0_1.chk eigen_descr0_2.chk eigen_descr0_3.chk eigen_de python compare_results.py --kind quantum --folder1 "$folder1" --folder2 "$folder2" $quantum_files -if [[ ${USE_MPI} -eq 1 ]]; then +if [[ ${USE_MPI} -ne 1 ]]; then python compare_results.py --kind intensity --precision 5e-6 --folder1 "$folder1" --folder2 "$folder2" file_intensity.out fi