!*********************************************************************** !* GNU Lesser General Public License !* !* This file is part of the GFDL Flexible Modeling System (FMS). !* !* FMS is free software: you can redistribute it and/or modify it under !* the terms of the GNU Lesser General Public License as published by !* the Free Software Foundation, either version 3 of the License, or (at !* your option) any later version. !* !* FMS is distributed in the hope that it will be useful, but WITHOUT !* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or !* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License !* for more details. !* !* You should have received a copy of the GNU Lesser General Public !* License along with FMS. If not, see . !*********************************************************************** !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! ! ! ! MPP_TRANSMIT ! ! ! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! subroutine MPP_TRANSMIT_( put_data, put_len, to_pe, get_data, get_len, from_pe, block, tag, recv_request, send_request ) !a message-passing routine intended to be reminiscent equally of both MPI and SHMEM !put_data and get_data are contiguous MPP_TYPE_ arrays !at each call, your put_data array is put to to_pe's get_data ! your get_data array is got from from_pe's put_data !i.e we assume that typically (e.g updating halo regions) each PE performs a put _and_ a get !special PE designations: ! NULL_PE: to disable a put or a get (e.g at boundaries) ! ANY_PE: if remote PE for the put or get is to be unspecific ! ALL_PES: broadcast and collect operations (collect not yet implemented) !ideally we would not pass length, but this f77-style call performs better (arrays passed by address, not descriptor) !further, this permits contiguous words from an array of any rank to be passed (avoiding f90 rank conformance check) !caller is responsible for completion checks (mpp_sync_self) before and after integer, intent(in) :: put_len, to_pe, get_len, from_pe MPP_TYPE_, intent(in) :: put_data(*) MPP_TYPE_, intent(out) :: get_data(*) logical, intent(in), optional :: block integer, intent(in), optional :: tag integer, intent(out), optional :: recv_request, send_request logical :: block_comm integer :: i MPP_TYPE_, allocatable, save :: local_data(:) !local copy used by non-parallel code (no SHMEM or MPI) integer :: comm_tag integer :: rsize if( .NOT.module_is_initialized )call mpp_error( FATAL, 'MPP_TRANSMIT: You must first call mpp_init.' ) if( to_pe.EQ.NULL_PE .AND. from_pe.EQ.NULL_PE )return block_comm = .true. if(PRESENT(block)) block_comm = block if( debug )then call SYSTEM_CLOCK(tick) write( stdout_unit,'(a,i18,a,i6,a,2i6,2i8)' )& 'T=',tick, ' PE=',pe, ' MPP_TRANSMIT begin: to_pe, from_pe, put_len, get_len=', to_pe, from_pe, put_len, get_len end if comm_tag = DEFAULT_TAG if(present(tag)) comm_tag = tag !do put first and then get if( to_pe.GE.0 .AND. to_pe.LT.npes )then !use non-blocking sends if( debug .and. (current_clock.NE.0) )call SYSTEM_CLOCK(start_tick) !z1l: truly non-blocking send. ! if( request(to_pe).NE.MPI_REQUEST_NULL )then !only one message from pe->to_pe in queue ! if( debug )write( stderr(),* )'PE waiting for sending', pe, to_pe ! call MPI_WAIT( request(to_pe), stat, error ) ! end if if(present(send_request)) then call MPI_ISEND( put_data, put_len, MPI_TYPE_, to_pe, comm_tag, mpp_comm_private, send_request, error) else cur_send_request = cur_send_request + 1 if( cur_send_request > max_request ) call mpp_error(FATAL, & "MPP_TRANSMIT: cur_send_request is greater than max_request, increase mpp_nml request_multiply") call MPI_ISEND( put_data, put_len, MPI_TYPE_, to_pe, comm_tag, mpp_comm_private, request_send(cur_send_request), error) endif if( debug .and. (current_clock.NE.0) )call increment_current_clock( EVENT_SEND, put_len*MPP_TYPE_BYTELEN_ ) else if( to_pe.EQ.ALL_PES )then !this is a broadcast from from_pe if( from_pe.LT.0 .OR. from_pe.GE.npes )call mpp_error( FATAL, 'MPP_TRANSMIT: broadcasting from invalid PE.' ) if( put_len.GT.get_len )call mpp_error( FATAL, 'MPP_TRANSMIT: size mismatch between put_data and get_data.' ) if( pe.EQ.from_pe )then if( LOC(get_data).NE.LOC(put_data) )then !dir$ IVDEP do i = 1,get_len get_data(i) = put_data(i) end do end if end if call mpp_broadcast( get_data, get_len, from_pe ) return else if( to_pe.EQ.ANY_PE )then !we don't have a destination to do puts to, so only do gets !...but you cannot have a pure get with MPI call mpp_error( FATAL, 'MPP_TRANSMIT: you cannot transmit to ANY_PE using MPI.' ) else if( to_pe.NE.NULL_PE )then !no other valid cases except NULL_PE call mpp_error( FATAL, 'MPP_TRANSMIT: invalid to_pe.' ) end if !do the get: for libSMA, a get means do a wait to ensure put on remote PE is complete if( from_pe.GE.0 .AND. from_pe.LT.npes )then !receive from from_pe if( debug .and. (current_clock.NE.0) )call SYSTEM_CLOCK(start_tick) if( block_comm ) then call MPI_RECV( get_data, get_len, MPI_TYPE_, from_pe, comm_tag, mpp_comm_private, stat, error ) call MPI_GET_COUNT( stat, MPI_TYPE_, rsize, error) if(rsize .NE. get_len) then print*, "rsize, get_len=", rsize, get_len, mpp_pe(), from_pe call mpp_error(FATAL, "MPP_TRANSMIT: get_len does not match size of data received") endif else ! if( request_recv(from_pe).NE.MPI_REQUEST_NULL )then !only one message from from_pe->pe in queue ! if( debug )write( stderr(),* )'PE waiting for receiving', pe, from_pe ! call MPI_WAIT( request_recv(from_pe), stat, error ) ! end if if(PRESENT(recv_request)) then call MPI_IRECV( get_data, get_len, MPI_TYPE_, from_pe, comm_tag, mpp_comm_private, & recv_request, error ) else cur_recv_request = cur_recv_request + 1 if( cur_recv_request > max_request ) call mpp_error(FATAL, & "MPP_TRANSMIT: cur_recv_request is greater than max_request, increase mpp_nml request_multiply") call MPI_IRECV( get_data, get_len, MPI_TYPE_, from_pe, comm_tag, mpp_comm_private, & request_recv(cur_recv_request), error ) size_recv(cur_recv_request) = get_len type_recv(cur_recv_request) = MPI_TYPE_ endif endif if( debug .and. (current_clock.NE.0) )call increment_current_clock( EVENT_RECV, get_len*MPP_TYPE_BYTELEN_ ) else if( from_pe.EQ.ANY_PE )then !receive from MPI_ANY_SOURCE if( debug .and. (current_clock.NE.0) )call SYSTEM_CLOCK(start_tick) call MPI_RECV( get_data, get_len, MPI_TYPE_, MPI_ANY_SOURCE, comm_tag, mpp_comm_private, stat, error ) if( debug .and. (current_clock.NE.0) )call increment_current_clock( EVENT_RECV, get_len*MPP_TYPE_BYTELEN_ ) else if( from_pe.EQ.ALL_PES )then call mpp_error( FATAL, 'MPP_TRANSMIT: from_pe=ALL_PES has ambiguous meaning, and hence is not implemented.' ) else if( from_pe.NE.NULL_PE )then !only remaining valid choice is NULL_PE call mpp_error( FATAL, 'MPP_TRANSMIT: invalid from_pe.' ) end if if( debug )then call SYSTEM_CLOCK(tick) write( stdout_unit,'(a,i18,a,i6,a,2i6,2i8)' )& 'T=',tick, ' PE=',pe, ' MPP_TRANSMIT end: to_pe, from_pe, put_len, get_len=', to_pe, from_pe, put_len, get_len end if return end subroutine MPP_TRANSMIT_ !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! ! ! ! MPP_BROADCAST ! ! ! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! subroutine MPP_BROADCAST_( data, length, from_pe, pelist ) !this call was originally bundled in with mpp_transmit, but that doesn't allow !broadcast to a subset of PEs. This version will, and mpp_transmit will remain !backward compatible. MPP_TYPE_, intent(inout) :: data(*) integer, intent(in) :: length, from_pe integer, intent(in), optional :: pelist(:) integer :: n, i, from_rank, stdout_unit if( .NOT.module_is_initialized )call mpp_error( FATAL, 'MPP_BROADCAST: You must first call mpp_init.' ) n = get_peset(pelist); if( peset(n)%count.EQ.1 )return if( debug )then call SYSTEM_CLOCK(tick) write( stdout_unit,'(a,i18,a,i6,a,2i6,2i8)' )& 'T=',tick, ' PE=',pe, ' MPP_BROADCAST begin: from_pe, length=', from_pe, length end if if( .NOT.ANY(from_pe.EQ.peset(current_peset_num)%list) ) & call mpp_error( FATAL, 'MPP_BROADCAST: broadcasting from invalid PE.' ) if( debug .and. (current_clock.NE.0) )call SYSTEM_CLOCK(start_tick) ! find the rank of from_pe in the pelist. do i = 1, mpp_npes() if(peset(n)%list(i) == from_pe) then from_rank = i - 1 exit endif enddo if( mpp_npes().GT.1 )call MPI_BCAST( data, length, MPI_TYPE_, from_rank, peset(n)%id, error ) if( debug .and. (current_clock.NE.0) )call increment_current_clock( EVENT_BROADCAST, length*MPP_TYPE_BYTELEN_ ) return end subroutine MPP_BROADCAST_ !#################################################################################### #include