MODULE mod_parallel ! !git $Id$ !svn $Id: mod_parallel.F 1151 2023-02-09 03:08:53Z arango $ !================================================== Hernan G. Arango === ! Copyright (c) 2002-2023 The ROMS/TOMS Group ! ! Licensed under a MIT/X style license ! ! See License_ROMS.md ! !======================================================================= ! ! ! This module contains all variables used for parallelization ! ! ! !======================================================================= ! USE mod_kinds ! implicit none ! include 'mpif.h' ! PUBLIC :: allocate_parallel PUBLIC :: deallocate_parallel PUBLIC :: initialize_parallel ! !----------------------------------------------------------------------- ! Define variables in module. !----------------------------------------------------------------------- ! ! Switch to identify master processor. In serial and shared-memory ! applications it is always true. ! logical :: Master ! ! Switch to identify which thread is processing input/output files. ! In distributed-memory applications, this thread can be the master ! thread or all threads in case of parallel output. In serial and ! shared-memory applications it is always true. ! logical :: InpThread logical :: OutThread !$OMP THREADPRIVATE (InpThread, OutThread) ! ! Number of shared-memory parallel threads or distributed-memory ! parallel nodes. ! integer :: numthreads integer :: ForkSize ! ! First and last tile to process in a tiled application. ! integer, allocatable :: first_tile(:) integer, allocatable :: last_tile(:) !$OMP THREADPRIVATE (first_tile, last_tile) ! ! Parallel nodes assined to the ocean model. ! integer :: peOCN_frst ! first ocean parallel node integer :: peOCN_last ! last ocean parallel node ! ! Parallel threads/nodes counters used in critical parallel regions. ! integer :: tile_count = 0 integer :: block_count = 0 integer :: thread_count = 0 ! ! Profiling variables as function of parallel thread: ! ! proc Parallel process ID. ! Cstr Starting time for program region. ! Cend Ending time for program region. ! Csum Accumulated time for progam region. ! Ctotal Total time profiled ! total_cpu Total elapsed CPU ! integer, allocatable :: proc(:,:,:) real(r8) :: Ctotal, total_cpu, total_model(4) real(r8), allocatable :: Cstr(:,:,:) real(r8), allocatable :: Cend(:,:,:) real(r8), allocatable :: Csum(:,:,:) !$OMP THREADPRIVATE (proc) !$OMP THREADPRIVATE (Cstr, Cend) ! ! Switch manage time clock in "mp_bcasts". During initialization is ! set to .FALSE. because the profiling variables cannot be allocated ! and initialized before the "Ngrids" parameter is known. ! logical :: Lwclock = .FALSE. ! ! Distributed-memory master process. ! integer :: MyMaster = 0 ! ! Rank of the parallel local process. ! integer :: FullRank = 0 integer :: TaskRank = 0 integer :: MyRank = 0 integer :: MyThread = 0 !$OMP THREADPRIVATE (MyThread) ! ! Distributed-memory group communicator handles. ! integer :: OCN_COMM_WORLD ! internal ROMS communicator ! ! Set mpi_info opaque object handle. ! integer :: MP_INFO = MPI_INFO_NULL ! ! Type of message-passage floating point bindings. ! integer, parameter :: MP_FLOAT = MPI_DOUBLE_PRECISION integer, parameter :: MP_DOUBLE = MPI_DOUBLE_PRECISION ! CONTAINS ! SUBROUTINE allocate_parallel (Ngrids) ! !======================================================================= ! ! ! This routine allocates several variables in the module that depend ! ! on the number of nested grids. ! ! ! !======================================================================= ! USE mod_strings, ONLY: Nregion ! ! Imported variable declarations. ! integer, intent(in) :: Ngrids ! !----------------------------------------------------------------------- ! Allocate and initialize module variables. !----------------------------------------------------------------------- ! !$OMP PARALLEL ! First and last tile to process in a tiled application. ! IF (.not.allocated(first_tile)) THEN allocate ( first_tile(Ngrids) ) END IF IF (.not.allocated(last_tile)) THEN allocate ( last_tile(Ngrids) ) END IF ! ! Time profiling variables. ! IF (.not.allocated(proc)) THEN allocate ( proc(0:1,4,Ngrids) ) proc(0:1,1:4,1:Ngrids)=0 END IF IF (.not.allocated(Cstr)) THEN allocate ( Cstr(0:Nregion,4,Ngrids) ) Cstr(0:Nregion,1:4,1:Ngrids)=0.0_r8 END IF IF (.not.allocated(Cend)) THEN allocate ( Cend(0:Nregion,4,Ngrids) ) Cend(0:Nregion,1:4,1:Ngrids)=0.0_r8 END IF !$OMP END PARALLEL IF (.not.allocated(Csum)) THEN allocate ( Csum(0:Nregion,4,Ngrids) ) Csum(0:Nregion,1:4,1:Ngrids)=0.0_r8 END IF ! ! Initialize other profiling variables. ! Ctotal=0.0_r8 total_cpu=0.0_r8 total_model=0.0_r8 ! ! Activate wall clock switch used only in "mp_bcasts". This switch ! is set to .FALSE. during initialization before calling "inp_par.F" ! because the above profiling variables are allocated and initialized ! after the value of "Ngrids" is known. ! Lwclock=.TRUE. RETURN END SUBROUTINE allocate_parallel ! SUBROUTINE deallocate_parallel ! !======================================================================= ! ! ! This routine deallocates variables in the module. ! ! ! !======================================================================= ! !----------------------------------------------------------------------- ! Deallocate variables in module. !----------------------------------------------------------------------- ! !$OMP PARALLEL IF (allocated(first_tile)) deallocate ( first_tile ) IF (allocated(last_tile)) deallocate ( last_tile ) ! IF (allocated(proc)) deallocate ( proc ) IF (allocated(Cstr)) deallocate ( Cstr ) IF (allocated(Cend)) deallocate ( Cend ) !$OMP END PARALLEL IF (allocated(Csum)) deallocate ( Csum ) ! RETURN END SUBROUTINE deallocate_parallel ! SUBROUTINE initialize_parallel ! !======================================================================= ! ! ! This routine initializes and spawn distribute-memory nodes. ! ! ! !======================================================================= ! USE mod_iounits USE mod_scalars ! ! Local variable declarations. ! integer :: i integer :: MyError ! !----------------------------------------------------------------------- ! Initialize distributed-memory (1) configuration. !----------------------------------------------------------------------- ! ! Get the number of processes in the group associated with the world ! communicator. Here FullRank and MyRank are the same. It is computed ! for consistency wity concurrent applications. ! CALL mpi_comm_size (OCN_COMM_WORLD, numthreads, MyError) IF (MyError.ne.0) THEN WRITE (stdout,10) 10 FORMAT (/,' ROMS/TOMS - Unable to inquire number of', & & ' processors in the group.') exit_flag=6 RETURN END IF CALL mpi_comm_rank (OCN_COMM_WORLD, FullRank, MyError) ForkSize=numthreads ! ! Identify master, input and output threads. ! Master=.FALSE. InpThread=.FALSE. OutThread=.FALSE. IF (MyRank.eq.MyMaster) THEN Master=.TRUE. InpThread=.TRUE. OutThread=.TRUE. END IF ! RETURN END SUBROUTINE initialize_parallel END MODULE mod_parallel