!------------------------------------------------------------------------------- subroutine mpstart(mpicomm,mpirank,mpisize,iret) !$$$ Subprogram documentation block ! ! Subprogram: mpstart Initialize a distributed program ! Prgmmr: Iredell Org: W/NP23 Date: 1999-02-12 ! ! Abstract: This subprogram initializes a distributed program. ! It returns a communicator and the rank and size of the distribution. ! ! Program history log: ! 1999-02-12 Mark Iredell ! ! Usage: call mpstart(mpicomm,mpirank,mpisize,iret) ! Output argument list: ! mpicomm integer(kint_mpi) MPI communicator ! mpirank integer(kint_mpi) rank of the process (from mpi_comm_rank) ! mpisize integer(kint_mpi) size of the process (from mpi_comm_size) ! iret integer(kint_mpi) return code ! ! Subprograms called: ! mpi_init MPI initialization ! mpi_comm_rank get MPI rank ! mpi_comm_size get MPI size ! ! Attributes: ! Language: Fortran 90 ! !$$$ use machine,only:kint_mpi implicit none include 'mpif.h' integer(kint_mpi),intent(out):: mpicomm,mpirank,mpisize,iret ! - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - call mpi_init(iret) if(iret.ne.0) return mpicomm=MPI_COMM_WORLD call mpi_comm_rank(mpicomm,mpirank,iret) call mpi_comm_size(mpicomm,mpisize,iret) ! - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - end subroutine !------------------------------------------------------------------------------- subroutine mpabort(iret) !$$$ Subprogram documentation block ! ! Subprogram: mpabort Abort a distributed program ! Prgmmr: Iredell Org: W/NP23 Date: 1999-02-12 ! ! Abstract: This subprogram aborts a distributed program with a return code. ! ! Program history log: ! 1999-02-12 Mark Iredell ! ! Usage: call mpabort(iret) ! Input argument list: ! iret integer(kint_mpi) return code ! ! Subprograms called: ! mpi_abort MPI abort ! ! Attributes: ! Language: Fortran 90 ! !$$$ use machine,only:kint_mpi implicit none include 'mpif.h' integer(kint_mpi),intent(in):: iret integer(kint_mpi) ierr ! - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - call mpi_abort(MPI_COMM_WORLD,iret,ierr) ! - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - end subroutine !------------------------------------------------------------------------------- subroutine mpstop(iret) !$$$ Subprogram documentation block ! ! Subprogram: mpstop Finalize a distributed program ! Prgmmr: Iredell Org: W/NP23 Date: 1999-02-12 ! ! Abstract: This subprogram finalizes a distributed program. ! ! Program history log: ! 1999-02-12 Mark Iredell ! ! Usage: call mpstop(iret) ! Output argument list: ! iret integer(kint_mpi) return code ! ! Subprograms called: ! mpi_finalize MPI finalization ! ! Attributes: ! Language: Fortran 90 ! !$$$ use machine,only:kint_mpi implicit none include 'mpif.h' integer(kint_mpi),intent(out):: iret ! - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - call mpi_finalize(iret) ! - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - end subroutine !------------------------------------------------------------------------------- subroutine mpnexts(mpicomm,mpirank,mpisize,iflag,na,a,iret) !$$$ Subprogram documentation block ! ! Subprogram: mpnexts Send integer data to the next process ! Prgmmr: Iredell Org: W/NP23 Date: 1999-02-12 ! ! Abstract: This subprogram sends integer data to the next process. ! ! Program history log: ! 1999-02-12 Mark Iredell ! ! Usage: call mpnexts(mpicomm,mpirank,mpisize,iflag,na,a,iret) ! Input argument list: ! mpicomm integer(kint_mpi) MPI communicator ! mpirank integer(kint_mpi) rank of the process (from mpi_comm_rank) ! mpisize integer(kint_mpi) size of the process (from mpi_comm_size) ! iflag integer(kint_mpi) cyclic flag (=1 if last sends to the first) ! na integer(kint_mpi) number of words to send ! a integer(kint_mpi) (na) array of data to send ! Output argument list: ! iret integer(kint_mpi) return code ! ! Subprograms called: ! mpi_send MPI send ! ! Attributes: ! Language: Fortran 90 ! !$$$ use machine,only:kint_mpi implicit none include 'mpif.h' integer(kint_mpi),intent(in):: mpicomm,mpirank,mpisize,iflag,na integer(kint_mpi),intent(in):: a(na) integer(kint_mpi),intent(out):: iret ! - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - if(iflag.eq.1.or.mpirank.lt.mpisize-1)& call mpi_send(a,na,MPI_INTEGER,mod(mpirank+1,mpisize),0,mpicomm,iret) ! - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - end subroutine !------------------------------------------------------------------------------- subroutine mpnextr(mpicomm,mpirank,mpisize,iflag,na,a,iret) !$$$ Subprogram documentation block ! ! Subprogram: mpnextr Receive integer data from the previous process ! Prgmmr: Iredell Org: W/NP23 Date: 1999-02-12 ! ! Abstract: This subprogram receives integer data from the previous process. ! ! Program history log: ! 1999-02-12 Mark Iredell ! ! Usage: call mpnextr(mpicomm,mpirank,mpisize,iflag,na,a,iret) ! Input argument list: ! mpicomm integer(kint_mpi) MPI communicator ! mpirank integer(kint_mpi) rank of the process (from mpi_comm_rank) ! mpisize integer(kint_mpi) size of the process (from mpi_comm_size) ! iflag integer(kint_mpi) cyclic flag (=1 if first receives from last) ! na integer(kint_mpi) number of words to receive ! Output argument list: ! a integer(kint_mpi) (na) array to receive data ! iret integer(kint_mpi) return code ! ! Subprograms called: ! mpi_recv MPI receive ! ! Attributes: ! Language: Fortran 90 ! !$$$ use machine,only:kint_mpi implicit none include 'mpif.h' integer(kint_mpi),intent(in):: mpicomm,mpirank,mpisize,iflag,na integer(kint_mpi),intent(out):: a(na),iret integer(kint_mpi) status(MPI_STATUS_SIZE) ! - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - if(iflag.eq.1.or.mpirank.gt.0)& call mpi_recv(a,na,MPI_INTEGER,MPI_ANY_SOURCE,MPI_ANY_TAG,mpicomm,status,iret) ! - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - end subroutine !------------------------------------------------------------------------------- subroutine mptgen(mpirank,mpisize,nd,jt1,jt2,j1,j2,jx,jm,jn) !$$$ Subprogram documentation block ! ! Subprogram: mptgen Generate grid decomposition dimensions ! Prgmmr: Iredell Org: W/NP23 Date: 1999-02-12 ! ! Abstract: This subprogram decomposes total dimensions of a problem ! into smaller domains to be managed on a distributed memory system. ! The last dimension given is decomposed first. If more decompositions ! are possible, the next to last dimension is decomposed next, and so on. ! The transpositions between decompositions should be done by mptran*. ! ! Program history log: ! 1999-02-12 Mark Iredell ! ! Usage: call mptgen(mpirank,mpisize,nd,jt1,jt2,j1,j2,jx,jm,jn) ! Input argument list: ! mpirank integer(kint_mpi) rank of the process (from mpi_comm_rank) ! mpisize integer(kint_mpi) size of the process (from mpi_comm_size) ! nd integer(kint_mpi) number of dimensions to decompose ! jt1 integer(kint_mpi) (nd) lower bounds of total dimensions ! jt2 integer(kint_mpi) (nd) upper bounds of total dimensions ! Output argument list: ! j1 integer(kint_mpi) (nd) lower bounds of local decompositions ! j2 integer(kint_mpi) (nd) upper bounds of local decompositions ! jx integer(kint_mpi) (nd) local size of decompositions ! jm integer(kint_mpi) (nd) maximum size of decompositions ! jn integer(kint_mpi) (nd) number of decompositions ! ! Attributes: ! Language: Fortran 90 ! !$$$ use machine,only:kint_mpi implicit none integer(kint_mpi),intent(in):: mpirank,mpisize,nd,jt1(nd),jt2(nd) integer(kint_mpi),intent(out):: j1(nd),j2(nd),jx(nd),jm(nd),jn(nd) integer msize,mrank,msn,mrn,n ! - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - msize=mpisize mrank=mpirank do n=nd,1,-1 if(jt2(n).ge.jt1(n)) then jm(n)=(jt2(n)-jt1(n))/msize+1 msn=max(msize/(jt2(n)-jt1(n)+1),1) if(n.eq.1) msn=1 jn(n)=msize/msn mrn=mrank/msn j1(n)=min(jt1(n)+jm(n)*mrn,jt2(n)+1) j2(n)=min(jt1(n)+jm(n)*mrn+jm(n)-1,jt2(n)) jx(n)=j2(n)-j1(n)+1 msize=msn mrank=mod(mrank,msn) else jm(n)=0 jn(n)=1 j1(n)=jt1(n) j2(n)=jt2(n) jx(n)=0 endif enddo ! - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - end subroutine !------------------------------------------------------------------------------- subroutine mpfillc1(mpicomm,mpisize,im,ida,idb,km,kma,kmb,a,b) !$$$ Subprogram documentation block ! ! Subprogram: mpfillc1 Gather grid decomposition ! Prgmmr: Iredell Org: W/NP23 Date: 1999-02-12 ! ! Abstract: This subprogram gathers an array of data from a ! grid decomposition to its full extent by using message passing. ! The grid decomposition should be generated by mptgen. ! ! Program history log: ! 1999-02-12 Mark Iredell ! ! Usage: call mpfillc1(mpicomm,mpisize,im,ida,idb,km,kma,kmb,a,b) ! Input argument list: ! mpicomm integer(kint_mpi) mpi communicator ! mpisize integer(kint_mpi) size of the process (from mpi_comm_size) ! im integer(kint_mpi) undecomposed range ! ida integer(kint_mpi) undecomposed input dimension ! idb integer(kint_mpi) undecomposed output dimension ! km integer(kint_mpi) input grid decomposition size ! kma integer(kint_mpi) input grid decomposed range ! kmb integer(kint_mpi) output grid undecomposed range ! a character(1) (ida,kma) input array ! Output argument list: ! b character(1) (idb,kmb) output array ! ! Subprograms called: ! mpi_allgather mpi exchange of data between every process pair ! ! Remarks: ! While this routine serves a wide variety of scalable gather functions ! for multidimensional grids, ! (a) it does not work with nonrectanguloid grids; ! (b) it does not do any load balancing; ! (c) it does not do any communication hiding. ! ! This subprogram must be used rather than mpi_allgather ! in any of the following cases ! (a) The undecomposed range is less than the respective dimension ! (either im.lt.ida or im.lt.idb) ! (b) The decomposed range is ever less than the decomposition size ! (kma.lt.km for any process) ! (c) The output grid range is not the full extent ! (kmb.lt.mpisize*km) ! If none of these conditions apply, mpi_allgather could be used directly ! rather than this subprogram and would be more efficient. ! ! Attributes: ! language: fortran 90 ! !$$$ use machine,only:kint_mpi implicit none include 'mpif.h' integer(kint_mpi),intent(in):: mpicomm,mpisize integer(kint_mpi),intent(in):: im,ida,idb integer(kint_mpi),intent(in):: km,kma,kmb character(1),dimension(ida,kma),intent(in):: a character(1),dimension(idb,kmb),intent(out):: b character(1),dimension(im,km):: ta character(1),dimension(im,km,mpisize):: tb integer(kint_mpi)::i,k,l,ierr,kb ! - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ! internally gather input array !$omp parallel do do k=1,kma do i=1,im ta(i,k)=a(i,k) enddo enddo ! - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ! externally gather data call mpi_allgather(ta,im*km,MPI_CHARACTER,tb,im*km,MPI_CHARACTER,mpicomm,ierr) ! - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ! internally gather output array !$omp parallel do private(kb) do l=1,mpisize do k=1,km kb=k+km*(l-1) if(kb.le.kmb) then do i=1,im b(i,kb)=tb(i,k,l) enddo endif enddo enddo ! - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - end subroutine !------------------------------------------------------------------------------- subroutine mpfilli4(mpicomm,mpisize,im,ida,idb,km,kma,kmb,a,b) !$$$ Subprogram documentation block ! ! Subprogram: mpfilli4 Gather grid decomposition ! Prgmmr: Iredell Org: W/NP23 Date: 1999-02-12 ! ! Abstract: This subprogram gathers an array of data from a ! grid decomposition to its full extent by using message passing. ! The grid decomposition should be generated by mptgen. ! ! Program history log: ! 1999-02-12 Mark Iredell ! ! Usage: call mpfilli4(mpicomm,mpisize,im,ida,idb,km,kma,kmb,a,b) ! Input argument list: ! mpicomm integer(kint_mpi) mpi communicator ! mpisize integer(kint_mpi) size of the process (from mpi_comm_size) ! im integer(kint_mpi) undecomposed range ! ida integer(kint_mpi) undecomposed input dimension ! idb integer(kint_mpi) undecomposed output dimension ! km integer(kint_mpi) input grid decomposition size ! kma integer(kint_mpi) input grid decomposed range ! kmb integer(kint_mpi) output grid undecomposed range ! a integer(kint_mpi) (ida,kma) input array ! Output argument list: ! b integer(kint_mpi) (idb,kmb) output array ! ! Subprograms called: ! mpi_allgather mpi exchange of data between every process pair ! ! Remarks: ! While this routine serves a wide variety of scalable gather functions ! for multidimensional grids, ! (a) it does not work with nonrectanguloid grids; ! (b) it does not do any load balancing; ! (c) it does not do any communication hiding. ! ! This subprogram must be used rather than mpi_allgather ! in any of the following cases ! (a) The undecomposed range is less than the respective dimension ! (either im.lt.ida or im.lt.idb) ! (b) The decomposed range is ever less than the decomposition size ! (kma.lt.km for any process) ! (c) The output grid range is not the full extent ! (kmb.lt.mpisize*km) ! If none of these conditions apply, mpi_allgather could be used directly ! rather than this subprogram and would be more efficient. ! ! Attributes: ! language: fortran 90 ! !$$$ use machine,only:kint_mpi implicit none include 'mpif.h' integer(kint_mpi),intent(in):: mpicomm,mpisize integer(kint_mpi),intent(in):: im,ida,idb integer(kint_mpi),intent(in):: km,kma,kmb integer(kint_mpi),dimension(ida,kma),intent(in):: a integer(kint_mpi),dimension(idb,kmb),intent(out):: b integer(kint_mpi),dimension(im,km):: ta integer(kint_mpi),dimension(im,km,mpisize):: tb integer(kint_mpi)::i,k,l,ierr,kb ! - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ! internally gather input array !$omp parallel do do k=1,kma do i=1,im ta(i,k)=a(i,k) enddo enddo ! - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ! externally gather data call mpi_allgather(ta,im*km,MPI_INTEGER4,tb,im*km,MPI_INTEGER4,mpicomm,ierr) ! - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ! internally gather output array !$omp parallel do private(kb) do l=1,mpisize do k=1,km kb=k+km*(l-1) if(kb.le.kmb) then do i=1,im b(i,kb)=tb(i,k,l) enddo endif enddo enddo ! - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - end subroutine !------------------------------------------------------------------------------- subroutine mpfillr4(mpicomm,mpisize,im,ida,idb,km,kma,kmb,a,b) !$$$ Subprogram documentation block ! ! Subprogram: mpfillr4 Gather grid decomposition ! Prgmmr: Iredell Org: W/NP23 Date: 1999-02-12 ! ! Abstract: This subprogram gathers an array of data from a ! grid decomposition to its full extent by using message passing. ! The grid decomposition should be generated by mptgen. ! ! Program history log: ! 1999-02-12 Mark Iredell ! ! Usage: call mpfillr4(mpicomm,mpisize,im,ida,idb,km,kma,kmb,a,b) ! Input argument list: ! mpicomm integer(kint_mpi) mpi communicator ! mpisize integer(kint_mpi) size of the process (from mpi_comm_size) ! im integer(kint_mpi) undecomposed range ! ida integer(kint_mpi) undecomposed input dimension ! idb integer(kint_mpi) undecomposed output dimension ! km integer(kint_mpi) input grid decomposition size ! kma integer(kint_mpi) input grid decomposed range ! kmb integer(kint_mpi) output grid undecomposed range ! a real(4) (ida,kma) input array ! Output argument list: ! b real(4) (idb,kmb) output array ! ! Subprograms called: ! mpi_allgather mpi exchange of data between every process pair ! ! Remarks: ! While this routine serves a wide variety of scalable gather functions ! for multidimensional grids, ! (a) it does not work with nonrectanguloid grids; ! (b) it does not do any load balancing; ! (c) it does not do any communication hiding. ! ! This subprogram must be used rather than mpi_allgather ! in any of the following cases ! (a) The undecomposed range is less than the respective dimension ! (either im.lt.ida or im.lt.idb) ! (b) The decomposed range is ever less than the decomposition size ! (kma.lt.km for any process) ! (c) The output grid range is not the full extent ! (kmb.lt.mpisize*km) ! If none of these conditions apply, mpi_allgather could be used directly ! rather than this subprogram and would be more efficient. ! ! Attributes: ! language: fortran 90 ! !$$$ use machine,only:kint_mpi implicit none include 'mpif.h' integer(kint_mpi),intent(in):: mpicomm,mpisize integer(kint_mpi),intent(in):: im,ida,idb integer(kint_mpi),intent(in):: km,kma,kmb real(4),dimension(ida,kma),intent(in):: a real(4),dimension(idb,kmb),intent(out):: b real(4),dimension(im,km):: ta real(4),dimension(im,km,mpisize):: tb integer(kint_mpi)::i,k,l,ierr,kb ! - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ! internally gather input array !$omp parallel do do k=1,kma do i=1,im ta(i,k)=a(i,k) enddo enddo ! - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ! externally gather data call mpi_allgather(ta,im*km,MPI_REAL4,tb,im*km,MPI_REAL4,mpicomm,ierr) ! - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ! internally gather output array !$omp parallel do private(kb) do l=1,mpisize do k=1,km kb=k+km*(l-1) if(kb.le.kmb) then do i=1,im b(i,kb)=tb(i,k,l) enddo endif enddo enddo ! - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - end subroutine !------------------------------------------------------------------------------- subroutine mptran1l1(mpicomm,mpisize,jm,jma,jmb,jda,km,kma,kmb,kdb,a,b) !$$$ Subprogram documentation block ! ! Subprogram: mptran1l1 Transpose grid decompositions ! Prgmmr: Iredell Org: W/NP23 Date: 1999-02-12 ! ! Abstract: This subprogram transposes an array of data from one ! grid decomposition to another by using message passing. ! The grid decompositions should be generated by mptgen. ! ! Program history log: ! 1999-02-12 Mark Iredell ! ! Usage: call mptran1l1(mpicomm,mpisize,jm,jma,jmb,jda,km,kma,kmb,kdb,a,b) ! Input argument list: ! mpicomm integer(kint_mpi) mpi communicator ! mpisize integer(kint_mpi) size of the process (from mpi_comm_size) ! jm integer(kint_mpi) output grid decomposition size ! jma integer(kint_mpi) input grid undecomposed range ! jmb integer(kint_mpi) output grid decomposed range ! jda integer(kint_mpi) input grid undecomposed dimension ! km integer(kint_mpi) input grid decomposition size ! kma integer(kint_mpi) input grid decomposed range ! kmb integer(kint_mpi) output grid undecomposed range ! kdb integer(kint_mpi) output grid undecomposed dimension ! a logical*1 (jda,kma) input array ! Output argument list: ! b logical*1 (kdb,jmb) output array ! ! Subprograms called: ! mpi_alltoall mpi exchange of data between every process pair ! sgetmo essl transpose ! ! Remarks: ! While this routine serves a wide variety of scalable transpose functions ! for multidimensional grids, ! (a) it does not work with nonrectanguloid grids; ! (b) it does not do any load balancing; ! (c) it does not do any communication hiding. ! ! This subprogram must be used rather than mpi_alltoall ! in any of the following cases: ! (a) The decomposition size is greater than one ! (either km.gt.1 or jm.gt.1) ! (b) The decomposed range is ever zero ! (either kma.eq.0 or jmb.eq.0 for any process) ! (c) The output grid range is not the full extent ! (either kmb.lt.mpisize or kmb.lt.kda or jma.lt.mpisize or jma.lt.jda) ! If none of these conditions apply, mpi_alltoall could be used directly ! rather than this subprogram and would be more efficient. ! ! Attributes: ! language: fortran 90 ! !$$$ use machine,only:kint_mpi implicit none include 'mpif.h' integer(kint_mpi),intent(in):: mpicomm,mpisize integer(kint_mpi),intent(in):: jm,jma,jmb,jda integer(kint_mpi),intent(in):: km,kma,kmb,kdb logical*1,dimension(jda,kma),intent(in):: a logical*1,dimension(kdb,jmb),intent(out):: b logical*1,dimension(jm,km,mpisize):: ta,tb integer(kint_mpi)::j,k,l,ierr ! - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ! check single node case if(mpisize.eq.1) then do k=1,min(km,kma,kmb) do j=1,min(jm,jma,jmb) b(k,j)=a(j,k) enddo enddo return endif ! - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ! internally transpose input array !$omp parallel do do l=1,mpisize do k=1,kma do j=1,min(jm,jma-jm*(l-1)) ta(j,k,l)=a(j+jm*(l-1),k) enddo enddo enddo ! - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ! externally transpose data call mpi_alltoall(ta,jm*km,MPI_LOGICAL1,tb,jm*km,MPI_LOGICAL1,mpicomm,ierr) ! - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ! internally transpose output array !$omp parallel do do l=1,mpisize do k=1,min(km,kmb-km*(l-1)) do j=1,jmb b(k+km*(l-1),j)=tb(j,k,l) enddo enddo enddo ! - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - end subroutine !------------------------------------------------------------------------------- subroutine mptran1r4(mpicomm,mpisize,jm,jma,jmb,jda,km,kma,kmb,kdb,a,b) !$$$ Subprogram documentation block ! ! Subprogram: mptran1r4 Transpose grid decompositions ! Prgmmr: Iredell Org: W/NP23 Date: 1999-02-12 ! ! Abstract: This subprogram transposes an array of data from one ! grid decomposition to another by using message passing. ! The grid decompositions should be generated by mptgen. ! ! Program history log: ! 1999-02-12 Mark Iredell ! ! Usage: call mptran1r4(mpicomm,mpisize,jm,jma,jmb,jda,km,kma,kmb,kdb,a,b) ! Input argument list: ! mpicomm integer(kint_mpi) mpi communicator ! mpisize integer(kint_mpi) size of the process (from mpi_comm_size) ! jm integer(kint_mpi) output grid decomposition size ! jma integer(kint_mpi) input grid undecomposed range ! jmb integer(kint_mpi) output grid decomposed range ! jda integer(kint_mpi) input grid undecomposed dimension ! km integer(kint_mpi) input grid decomposition size ! kma integer(kint_mpi) input grid decomposed range ! kmb integer(kint_mpi) output grid undecomposed range ! kdb integer(kint_mpi) output grid undecomposed dimension ! a real(4) (jda,kma) input array ! Output argument list: ! b real(4) (kdb,jmb) output array ! ! Subprograms called: ! mpi_alltoall mpi exchange of data between every process pair ! sgetmo essl transpose ! ! Remarks: ! While this routine serves a wide variety of scalable transpose functions ! for multidimensional grids, ! (a) it does not work with nonrectanguloid grids; ! (b) it does not do any load balancing; ! (c) it does not do any communication hiding. ! ! This subprogram must be used rather than mpi_alltoall ! in any of the following cases: ! (a) The decomposition size is greater than one ! (either km.gt.1 or jm.gt.1) ! (b) The decomposed range is ever zero ! (either kma.eq.0 or jmb.eq.0 for any process) ! (c) The output grid range is not the full extent ! (either kmb.lt.mpisize or kmb.lt.kda or jma.lt.mpisize or jma.lt.jda) ! If none of these conditions apply, mpi_alltoall could be used directly ! rather than this subprogram and would be more efficient. ! ! Example 1. Transpose a 1000 x 10000 matrix. ! !!! include 'mpif.h' ! use mpi !!! parameter(jt=1000,kt=10000) ! set problem size !!! real,allocatable:: a(:,:),b(:,:) ! declare arrays !!! call mpi_init(ierr) ! initialize mpi !!! call mpi_comm_rank(MPI_COMM_WORLD,mpirank,ierr) ! get mpi rank !!! call mpi_comm_size(MPI_COMM_WORLD,mpisize,ierr) ! get mpi size !!! call mptgen(mpirank,mpisize,1,1,jt,j1,j2,jx,jm,jn) ! decompose output !!! call mptgen(mpirank,mpisize,1,1,kt,k1,k2,kx,km,kn) ! decompose input !!! allocate(a(jt,k1:k2),b(kt,j1:j2)) ! allocate arrays !!! a=reshape((/((j+k,j=1,jt),k=k1,k2)/),(/jt,k2-k1+1/)) ! initialize input !!! call mptran1r4(MPI_COMM_WORLD,mpisize, ! transpose arrays !!! & jm,jt,j2-j1+1,jt,km,k2-k1+1,kt,kt,a,b) !!! print '(2i8,f16.1)',((k,j,b(k,j),k=2000,kt,2000), ! print some values !!! & j=((j1-1)/200+1)*200,j2,200) !!! call mpi_finalize(ierr) ! finalize mpi !!! end ! ! This 1000x10000 transpose took 0.57 seconds on 2 winterhawk processors; ! it took 0.31 seconds on 4 processors and 0.18 seconds on 8 processors. ! A 10000x10000 transpose took 0.84 seconds on 16 winterhawk processors; ! it took 0.77 seconds on 20 processors and 0.40 seconds on 40 processors. ! ! Attributes: ! language: fortran 90 ! !$$$ use machine,only:kint_mpi implicit none include 'mpif.h' integer(kint_mpi),intent(in):: mpicomm,mpisize integer(kint_mpi),intent(in):: jm,jma,jmb,jda integer(kint_mpi),intent(in):: km,kma,kmb,kdb real(4),dimension(jda,kma),intent(in):: a real(4),dimension(kdb,jmb),intent(out):: b real(4),dimension(jm,km,mpisize):: ta,tb integer(kint_mpi)::j,k,l,ierr ! - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ! check single node case if(mpisize.eq.1) then if(min(km,kma,kmb,jm,jma,jmb).gt.0)& call sgetmo(a,jda,min(jm,jma,jmb),min(km,kma,kmb),b,kdb) return endif ! - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ! internally transpose input array !$omp parallel do do l=1,mpisize do k=1,kma do j=1,min(jm,jma-jm*(l-1)) ta(j,k,l)=a(j+jm*(l-1),k) enddo enddo enddo ! - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ! externally transpose data call mpi_alltoall(ta,jm*km,MPI_REAL4,tb,jm*km,MPI_REAL4,mpicomm,ierr) ! - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ! internally transpose output array if(min(km,kmb,jmb).gt.0)& call sgetmo(tb,jm,jmb,kmb,b,kdb) ! - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - end subroutine !------------------------------------------------------------------------------- subroutine mptranl1(mpicomm,mpisize,im,ida,idb,& jm,jma,jmb,jda,km,kma,kmb,kdb,a,b) !$$$ Subprogram documentation block ! ! Subprogram: mptranl1 Transpose grid decompositions ! Prgmmr: Iredell Org: W/NP23 Date: 1999-02-12 ! ! Abstract: This subprogram transposes an array of data from one ! grid decomposition to another by using message passing. ! The grid decompositions should be generated by mptgen. ! ! Program history log: ! 1999-02-12 Mark Iredell ! ! Usage: call mptranl1(mpicomm,mpisize,im,ida,idb,& ! jm,jma,jmb,jda,km,kma,kmb,kdb,a,b) ! Input argument list: ! mpicomm integer(kint_mpi) mpi communicator ! mpisize integer(kint_mpi) size of the process (from mpi_comm_size) ! im integer(kint_mpi) undecomposed range ! ida integer(kint_mpi) undecomposed input dimension ! idb integer(kint_mpi) undecomposed output dimension ! jm integer(kint_mpi) output grid decomposition size ! jma integer(kint_mpi) input grid undecomposed range ! jmb integer(kint_mpi) output grid decomposed range ! jda integer(kint_mpi) input grid undecomposed dimension ! km integer(kint_mpi) input grid decomposition size ! kma integer(kint_mpi) input grid decomposed range ! kmb integer(kint_mpi) output grid undecomposed range ! kdb integer(kint_mpi) output grid undecomposed dimension ! a logical*1 (ida,jda,kma) input array ! Output argument list: ! b logical*1 (idb,kdb,jmb) output array ! ! Subprograms called: ! mpi_alltoall mpi exchange of data between every process pair ! ! Remarks: ! While this routine serves a wide variety of scalable transpose functions ! for multidimensional grids, ! (a) it does not work with nonrectanguloid grids; ! (b) it does not do any load balancing; ! (c) it does not do any communication hiding. ! ! This subprogram must be used rather than mpi_alltoall ! in any of the following cases: ! (a) The undecomposed range is less than the respective dimension ! (either im.lt.ida or im.lt.idb) ! (b) The decomposition size is greater than one ! (either km.gt.1 or jm.gt.1) ! (c) The decomposed range is ever zero ! (either kma.eq.0 or jmb.eq.0 for any process) ! (d) The output grid range is not the full extent ! (either kmb.lt.mpisize or kmb.lt.kda or jma.lt.mpisize or jma.lt.jda) ! If none of these conditions apply, mpi_alltoall could be used directly ! rather than this subprogram and would be more efficient. ! ! Attributes: ! language: fortran 90 ! !$$$ use machine,only:kint_mpi implicit none include 'mpif.h' integer(kint_mpi),intent(in):: mpicomm,mpisize integer(kint_mpi),intent(in):: im,ida,idb integer(kint_mpi),intent(in):: jm,jma,jmb,jda integer(kint_mpi),intent(in):: km,kma,kmb,kdb logical*1,dimension(ida,jda,kma),intent(in):: a logical*1,dimension(idb,kdb,jmb),intent(out):: b logical*1,dimension(im,jm,km,mpisize):: ta,tb integer(kint_mpi)::i,j,k,l,ierr,ja,kb ! - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ! internally transpose input array !$omp parallel do private(ja) do l=1,mpisize do k=1,kma do j=1,jm ja=j+jm*(l-1) if(ja.le.jma) then do i=1,im ta(i,j,k,l)=a(i,ja,k) enddo endif enddo enddo enddo ! - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ! externally transpose data call mpi_alltoall(ta,im*jm*km,MPI_LOGICAL1,tb,im*jm*km,MPI_LOGICAL1,mpicomm,ierr) ! - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ! internally transpose output array !$omp parallel do private(kb) do l=1,mpisize do k=1,km kb=k+km*(l-1) if(kb.le.kmb) then do j=1,jmb do i=1,im b(i,kb,j)=tb(i,j,k,l) enddo enddo endif enddo enddo ! - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - end subroutine !------------------------------------------------------------------------------- subroutine mptranr4(mpicomm,mpisize,im,ida,idb,& jm,jma,jmb,jda,km,kma,kmb,kdb,a,b) !$$$ Subprogram documentation block ! ! Subprogram: mptranr4 Transpose grid decompositions ! Prgmmr: Iredell Org: W/NP23 Date: 1999-02-12 ! ! Abstract: This subprogram transposes an array of data from one ! grid decomposition to another by using message passing. ! The grid decompositions should be generated by mptgen. ! ! Program history log: ! 1999-02-12 Mark Iredell ! ! Usage: call mptranr4(mpicomm,mpisize,im,ida,idb,& ! jm,jma,jmb,jda,km,kma,kmb,kdb,a,b) ! Input argument list: ! mpicomm integer(kint_mpi) mpi communicator ! mpisize integer(kint_mpi) size of the process (from mpi_comm_size) ! im integer(kint_mpi) undecomposed range ! ida integer(kint_mpi) undecomposed input dimension ! idb integer(kint_mpi) undecomposed output dimension ! jm integer(kint_mpi) output grid decomposition size ! jma integer(kint_mpi) input grid undecomposed range ! jmb integer(kint_mpi) output grid decomposed range ! jda integer(kint_mpi) input grid undecomposed dimension ! km integer(kint_mpi) input grid decomposition size ! kma integer(kint_mpi) input grid decomposed range ! kmb integer(kint_mpi) output grid undecomposed range ! kdb integer(kint_mpi) output grid undecomposed dimension ! a real(4) (ida,jda,kma) input array ! Output argument list: ! b real(4) (idb,kdb,jmb) output array ! ! Subprograms called: ! mpi_alltoall mpi exchange of data between every process pair ! ! Remarks: ! While this routine serves a wide variety of scalable transpose functions ! for multidimensional grids, ! (a) it does not work with nonrectanguloid grids; ! (b) it does not do any load balancing; ! (c) it does not do any communication hiding. ! ! This subprogram must be used rather than mpi_alltoall ! in any of the following cases: ! (a) The undecomposed range is less than the respective dimension ! (either im.lt.ida or im.lt.idb) ! (b) The decomposition size is greater than one ! (either km.gt.1 or jm.gt.1) ! (c) The decomposed range is ever zero ! (either kma.eq.0 or jmb.eq.0 for any process) ! (d) The output grid range is not the full extent ! (either kmb.lt.mpisize or kmb.lt.kda or jma.lt.mpisize or jma.lt.jda) ! If none of these conditions apply, mpi_alltoall could be used directly ! rather than this subprogram and would be more efficient. ! ! Example 1. Transpose a 1000 x 10000 matrix. ! !!! include 'mpif.h' ! use mpi !!! parameter(jt=1000,kt=10000) ! set problem size !!! real,allocatable:: a(:,:),b(:,:) ! declare arrays !!! call mpi_init(ierr) ! initialize mpi !!! call mpi_comm_rank(MPI_COMM_WORLD,mpirank,ierr) ! get mpi rank !!! call mpi_comm_size(MPI_COMM_WORLD,mpisize,ierr) ! get mpi size !!! call mptgen(mpirank,mpisize,1,1,jt,j1,j2,jx,jm,jn) ! decompose output !!! call mptgen(mpirank,mpisize,1,1,kt,k1,k2,kx,km,kn) ! decompose input !!! allocate(a(jt,k1:k2),b(kt,j1:j2)) ! allocate arrays !!! a=reshape((/((j+k,j=1,jt),k=k1,k2)/),(/jt,k2-k1+1/)) ! initialize input !!! call mptranr4(MPI_COMM_WORLD,mpisize,1,1,1, ! transpose arrays !!! & jm,jt,j2-j1+1,jt,km,k2-k1+1,kt,kt,a,b) !!! print '(2i8,f16.1)',((k,j,b(k,j),k=2000,kt,2000), ! print some values !!! & j=((j1-1)/200+1)*200,j2,200) !!! call mpi_finalize(ierr) ! finalize mpi !!! end ! ! This transpose took 0.6 seconds on 4 2-way winterhawk nodes. ! A 20000x10000 transpose took 3.4 seconds on 16 2-way winterhawk nodes. ! Thus a transpose may take about 1 second for every 16 Mb per node. ! ! Attributes: ! language: fortran 90 ! !$$$ use machine,only:kint_mpi implicit none include 'mpif.h' integer(kint_mpi),intent(in):: mpicomm,mpisize integer(kint_mpi),intent(in):: im,ida,idb integer(kint_mpi),intent(in):: jm,jma,jmb,jda integer(kint_mpi),intent(in):: km,kma,kmb,kdb real(4),dimension(ida,jda,kma),intent(in):: a real(4),dimension(idb,kdb,jmb),intent(out):: b real(4),dimension(im,jm,km,mpisize):: ta,tb integer(kint_mpi)::i,j,k,l,ierr,ja,kb ! - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ! internally transpose input array !$omp parallel do private(ja) do l=1,mpisize do k=1,kma do j=1,jm ja=j+jm*(l-1) if(ja.le.jma) then do i=1,im ta(i,j,k,l)=a(i,ja,k) enddo endif enddo enddo enddo ! - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ! externally transpose data call mpi_alltoall(ta,im*jm*km,MPI_REAL4,tb,im*jm*km,MPI_REAL4,mpicomm,ierr) ! - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ! internally transpose output array !$omp parallel do private(kb) do l=1,mpisize do k=1,km kb=k+km*(l-1) if(kb.le.kmb) then do j=1,jmb do i=1,im b(i,kb,j)=tb(i,j,k,l) enddo enddo endif enddo enddo ! - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - end subroutine !------------------------------------------------------------------------------- subroutine mptrangr4(mpicomm,mpisize,ndi,ndj,ndk,im,isa,isb,& jma,jsa,jmb,jsb,jm,jn,kma,ksa,kmb,ksb,km,kn,a,b) !$$$ Subprogram documentation block ! ! Subprogram: mptrangr4 Transpose grid decompositions ! Prgmmr: Iredell Org: W/NP23 Date: 1999-02-12 ! ! Abstract: This subprogram transposes an array of data from one ! grid decomposition to another by using message passing. ! More than one dimension in the input or output array may be decomposed. ! The respective grid decompositions should be generated by mptgen. ! ! Program history log: ! 1999-02-12 Mark Iredell ! ! Usage: call mptrangr4(mpicomm,mpisize,ndi,ndj,ndk,im,isa,isb,& ! jma,jsa,jmb,jsb,jm,jn,kma,ksa,kmb,ksb,km,kn,a,b) ! Input argument list: ! mpicomm integer(kint_mpi) mpi communicator ! mpisize integer(kint_mpi) size of the process (from mpi_comm_size) ! ndi integer(kint_mpi) number of undecomposed dimensions ! ndj integer(kint_mpi) number of dimensions decomposed in the output grid ! ndk integer(kint_mpi) number of dimensions decomposed in the input grid ! im integer(kint_mpi) (ndi) undecomposed dimension ranges ! isa integer(kint_mpi) (ndi) undecomposed dimension input grid skip values ! isb integer(kint_mpi) (ndi) undecomposed dimension output grid skip values ! jma integer(kint_mpi) (ndj) input grid undecomposed dimension ranges ! jsa integer(kint_mpi) (ndj) input grid undecomposed dimension skip values ! jmb integer(kint_mpi) (ndj) output grid decomposed dimension ranges ! jsb integer(kint_mpi) (ndj) output grid decomposed dimension skip values ! jm integer(kint_mpi) (ndj) output grid decomposition sizes ! jn integer(kint_mpi) (ndj) output grid decomposition divisions ! kma integer(kint_mpi) (ndk) input grid decomposed dimension ranges ! ksa integer(kint_mpi) (ndk) input grid decomposed dimension skip values ! kmb integer(kint_mpi) (ndk) output grid undecomposed dimension ranges ! ksb integer(kint_mpi) (ndk) output grid undecomposed dimension skip values ! km integer(kint_mpi) (ndk) input grid decomposition sizes ! kn integer(kint_mpi) (ndk) input grid decomposition divisions ! a real(4) (1+dot_product(im-1,isa)+& ! dot_product(jma-1,jsa)+& ! dot_product(kma-1,ksa)) input array ! Output argument list: ! b real(4) (1+dot_product(im-1,isb)+& ! dot_product(jmb-1,jsb)+& ! dot_product(kmb-1,ksb)) output array ! ! Subprograms called: ! mpi_alltoall mpi exchange of data between every process pair ! ! Remarks: ! While this routine serves a wide variety of scalable transpose functions ! for multidimensional grids, ! (a) it does not work with nonrectanguloid grids; ! (b) it does not do any load balancing; ! (c) it does not do any communication hiding. ! ! Remarks: ! Example 1. Transpose a 1000 x 10000 matrix. ! !!! include 'mpif.h' ! use mpi !!! parameter(jt=1000,kt=10000) ! set problem size !!! real,allocatable:: a(:,:),b(:,:) ! declare arrays !!! call mpi_init(ierr) ! initialize mpi !!! call mpi_comm_rank(MPI_COMM_WORLD,mpirank,ierr) ! get mpi rank !!! call mpi_comm_size(MPI_COMM_WORLD,mpisize,ierr) ! get mpi size !!! call mptgen(mpirank,mpisize,1,1,jt,j1,j2,jx,jm,jn) ! decompose output !!! call mptgen(mpirank,mpisize,1,1,kt,k1,k2,kx,km,kn) ! decompose input !!! allocate(a(jt,k1:k2),b(kt,j1:j2)) ! allocate arrays !!! a=reshape((/((j+k,j=1,jt),k=k1,k2)/),(/jt,k2-k1+1/)) ! initialize input !!! call mptranr4g(MPI_COMM_WORLD,mpisize,1,1,1,1,1,1, ! transpose arrays !!! & jt,1,j2-j1+1,kt,jm,jn, !!! & k2-k1+1,jt,kt,1,km,kn,a,b) !!! print '(2i8,f16.1)',((k,j,b(k,j),k=2000,kt,2000), ! print some values !!! & j=((j1-1)/200+1)*200,j2,200) !!! call mpi_finalize(ierr) ! finalize mpi !!! end ! ! This transpose took 2.4 seconds on 4 2-way winterhawk nodes. ! A 20000x10000 transpose took 12.5 seconds on 16 2-way winterhawk nodes. ! Thus a transpose may take about 1 second for every 4 Mb per node. ! ! Attributes: ! language: fortran 90 ! !$$$ use machine,only:kint_mpi implicit none include 'mpif.h' integer(kint_mpi),intent(in):: mpicomm,mpisize,ndi,ndj,ndk integer(kint_mpi),dimension(ndi),intent(in):: im,isa,isb integer(kint_mpi),dimension(ndj),intent(in):: jma,jsa,jmb,jsb,jm,jn integer(kint_mpi),dimension(ndk),intent(in):: kma,ksa,kmb,ksb,km,kn real(4),dimension(1+dot_product(im-1,isa)+& dot_product(jma-1,jsa)+& dot_product(kma-1,ksa)),intent(in):: a real(4),dimension(1+dot_product(im-1,isb)+& dot_product(jmb-1,jsb)+& dot_product(kmb-1,ksb)),intent(out):: b real(4),dimension(product(im),product(jm),product(km),mpisize):: ta,tb integer(kint_mpi)::iq,jq,kq integer(kint_mpi)::ip(ndi),jpa(ndj),jpb(ndj),kpa(ndk),kpb(ndk) integer(kint_mpi)::ix(ndi),jx(ndj),kx(ndk) integer(kint_mpi)::i,j,k,l,ierr,ks,js,is ! - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ! compute dimension products iq=product(im) jq=product(jm) kq=product(km) ip=(/(product(im(1:i-1)),i=1,ndi)/) jpa=(/(product(jm(1:i-1)*jn(1:i-1)),i=1,ndj)/) jpb=(/(product(jm(1:i-1)),i=1,ndj)/) kpa=(/(product(km(1:i-1)),i=1,ndk)/) kpb=(/(product(km(1:i-1)*kn(1:i-1)),i=1,ndk)/) ! - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ! internally transpose input array !$omp parallel do private(kx,ks,jx,js,ix,is) do l=1,mpisize do k=1,kq kx=mod((k-1)/kpa(1:ndk),km) if(all(kx.lt.kma)) then ks=1+dot_product(kx,ksa) do j=1,jq jx=mod((j+jq*(l-1)-1)/jpa(1:ndj),jm*jn) if(all(jx.lt.jma)) then js=ks+dot_product(jx,jsa) do i=1,iq ix=mod((i-1)/ip(1:ndi),im) is=js+dot_product(ix,isa) ta(i,j,k,l)=a(is) enddo endif enddo endif enddo enddo ! - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ! externally transpose data call mpi_alltoall(ta,iq*jq*kq,MPI_REAL4,tb,iq*jq*kq,MPI_REAL4,mpicomm,ierr) ! - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ! internally transpose output array !$omp parallel do private(kx,ks,jx,js,ix,is) do l=1,mpisize do k=1,kq kx=mod((k+kq*(l-1)-1)/kpb(1:ndk),km*kn) if(all(kx.lt.kmb)) then ks=1+dot_product(kx,ksb) do j=1,jq jx=mod((j-1)/jpb(1:ndj),jm) if(all(jx.lt.jmb)) then js=ks+dot_product(jx,jsb) do i=1,iq ix=mod((i-1)/ip(1:ndi),im) is=js+dot_product(ix,isb) b(is)=tb(i,j,k,l) enddo endif enddo endif enddo enddo ! - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - end subroutine