# 1 "../memutils/memutils.F90" !*********************************************************************** !* GNU Lesser General Public License !* !* This file is part of the GFDL Flexible Modeling System (FMS). !* !* FMS is free software: you can redistribute it and/or modify it under !* the terms of the GNU Lesser General Public License as published by !* the Free Software Foundation, either version 3 of the License, or (at !* your option) any later version. !* !* FMS is distributed in the hope that it will be useful, but WITHOUT !* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or !* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License !* for more details. !* !* You should have received a copy of the GNU Lesser General Public !* License along with FMS. If not, see . !*********************************************************************** module memutils_mod !Author: Balaji (V.Balaji@noaa.gov) !Various operations for memory management !these currently include efficient methods for memory-to-memory copy !including strided data and arbitrary gather-scatter vectors !also various memory and cache inquiry operators implicit none private # 29 integer(kind=8) :: l1_cache_line_size, l1_cache_size, l1_associativity integer(kind=8) :: l2_cache_line_size, l2_cache_size, l2_associativity logical :: memutils_initialized=.FALSE. interface memcpy module procedure memcpy_r8 module procedure memcpy_r8_gather module procedure memcpy_r8_scatter module procedure memcpy_r8_gather_scatter end interface public :: get_l1_cache_line, get_l2_cache_line, memcpy, memutils_init public :: print_memuse_stats # 47 # 50 logical, private :: print_memory_usage=.FALSE. contains subroutine memutils_init(print_flag) !initialize memutils module !currently sets default cache characteristics !(will provide overrides later) !also sets pe to my_pe on t3e logical, optional :: print_flag # 68 !defaults l1_cache_line_size = 1 l1_cache_size = 1 l1_associativity = 1 l2_cache_line_size = 1 l2_cache_size = 1 l2_associativity = 1 # 79 if( PRESENT(print_flag) )print_memory_usage = print_flag memutils_initialized = .TRUE. return end subroutine memutils_init !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! ! ! !MEMCPY routines: real*8 words are copied from RHS to LHS ! ! Either side can have constant stride (lhs_stride, rhs_stride) ! ! or indexed by a gather/scatter array (lhs_indx, rhs_indx) ! ! index arrays are 0-based (i.e C-like not fortran-like: this is ! ! for compatibility with the SHMEM_IXGET/PUT routines) ! ! ! ! EXAMPLES: ! ! ! !Replace ! ! a(0:n-1) = b(0:n-1) ! !with ! ! call memcpy(a,b,n) ! ! ! !Replace ! ! a(0:2*n-1:2) = b(0:3*n-1:3) ! !with ! ! call memcpy(a,b,dim,n,2,3) !dim.GE.3*n ! ! ! !Replace ! ! a(0:n-1) = b(indx(1:n)) ! !with ! ! call memcpy(a,b,dim,n,1,indx) !dim.GE.max(indx) ! ! ! !Replace ! ! a(indx(1:n)) = b(0:n-1) ! !with ! ! call memcpy(a,b,dim,n,indx,1) !dim.GE.max(indx) ! ! ! !Replace ! ! a(indxa(1:n)) = b(indxb(1:n)) ! !with ! ! call memcpy(a,b,dim,n,indx,indxb) !dim.GE.max(indxa,indxb) ! ! ! ! There are no error checks!!! (routines are built for speed) ! ! Specifically there is no bounds-checking: if the stride or ! ! indexing causes you to exceed you will have done a ! ! potentially unsafe memory load ! ! ! !T3E: we use the shmem routines on-processor to effect the transfer ! ! via the (faster) E-registers ! ! ! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! subroutine memcpy_r8( lhs, rhs, dim, nelems, lhs_stride, rhs_stride ) !base routine: handles constant stride memcpy !default strides are of course 1 integer, intent(in) :: dim real(kind=8), dimension(0:dim-1), intent(in) :: rhs real(kind=8), dimension(0:dim-1), intent(out) :: lhs integer, intent(in), optional :: nelems, lhs_stride, rhs_stride integer :: n, rs, ls !defaults n = dim ls = 1 rs = 1 if( PRESENT(nelems) )then n = nelems !only check for stride if nelems is present if( PRESENT(lhs_stride) )ls = lhs_stride if( PRESENT(rhs_stride) )rs = rhs_stride endif if( ls.EQ.1 .AND. rs.EQ.1 )then # 151 lhs(0:n-1) = rhs(0:n-1) else # 157 lhs(0:n*ls-1:ls) = rhs(0:n*rs-1:rs) endif return end subroutine memcpy_r8 subroutine memcpy_r8_gather( lhs, rhs, dim, nelems, lhs_stride, rhs_indx ) !memcpy routine with gather: copies nelems words from rhs(indx(:)) to lhs(:) integer, intent(in) :: dim, nelems, lhs_stride real(kind=8), dimension(0:dim-1), intent(in) :: rhs real(kind=8), dimension(0:dim-1), intent(out) :: lhs integer, intent(in), dimension(nelems) :: rhs_indx # 180 lhs(0:nelems*lhs_stride-1:lhs_stride) = rhs(rhs_indx(1:nelems)) return end subroutine memcpy_r8_gather subroutine memcpy_r8_scatter( lhs, rhs, dim, nelems, lhs_indx, rhs_stride ) !memcpy routine with scatter: copies nelems words from rhs(:) to lhs(indx(:)) integer, intent(in) :: dim, nelems, rhs_stride real(kind=8), dimension(0:dim-1), intent(in) :: rhs real(kind=8), dimension(0:dim-1), intent(out) :: lhs integer, intent(in), dimension(nelems) :: lhs_indx # 203 lhs(lhs_indx(1:nelems)) = rhs(0:nelems*rhs_stride-1:rhs_stride) return end subroutine memcpy_r8_scatter subroutine memcpy_r8_gather_scatter( lhs, rhs, dim, nelems, lhs_indx, rhs_indx ) !memcpy routine with gather/scatter: copies nelems words from rhs(indx(:)) to lhs(indx(:)) integer, intent(in) :: dim, nelems real(kind=8), dimension(0:dim-1), intent(in) :: rhs real(kind=8), dimension(0:dim-1), intent(out) :: lhs integer, intent(in), dimension(nelems) :: lhs_indx, rhs_indx # 222 lhs(lhs_indx(1:nelems)) = rhs(rhs_indx(1:nelems)) return end subroutine memcpy_r8_gather_scatter # 244 # 262 !cache utilities: need to write version for other argument types function get_l1_cache_line(a) integer(kind=8) :: get_l1_cache_line real, intent(in) :: a integer(kind=8) :: i i = LOC(a) get_l1_cache_line = mod(i,l1_cache_size/l1_associativity)/l1_cache_line_size end function get_l1_cache_line function get_l2_cache_line(a) integer(kind=8) :: get_l2_cache_line real, intent(in) :: a integer(kind=8) :: i i = LOC(a) get_l2_cache_line = mod(i,l2_cache_size/l2_associativity)/l2_cache_line_size end function get_l2_cache_line subroutine print_memuse_stats( text, unit, always ) use mpp_mod, only: mpp_pe, mpp_root_pe, mpp_npes, mpp_min, mpp_max, mpp_sum, stderr character(len=*), intent(in) :: text integer, intent(in), optional :: unit logical, intent(in), optional :: always real :: m, mmin, mmax, mavg, mstd integer :: mu !memuse is an external function: works on SGI !use #ifdef to generate equivalent on other platforms. integer :: memuse !default integer OK? character(len=8) :: walldate character(len=10) :: walltime character(len=5) :: wallzone integer :: wallvalues(8) if( PRESENT(always) )then if( .NOT.always )return else if( .NOT.print_memory_usage )return end if mu = stderr(); if( PRESENT(unit) )mu = unit # 304 call mem_dump(m) mmin = m; call mpp_min(mmin) mmax = m; call mpp_max(mmax) mavg = m; call mpp_sum(mavg); mavg = mavg/mpp_npes() mstd = (m-mavg)**2; call mpp_sum(mstd); mstd = sqrt( mstd/mpp_npes() ) if( mpp_pe().EQ.mpp_root_pe() ) then call DATE_AND_TIME(walldate, walltime, wallzone, wallvalues) write( mu,'(a84,4es11.3)' ) trim(walldate)//' '//trim(walltime)//& ': Memuse(MB) at '//trim(text)//'=', mmin, mmax, mstd, mavg endif return end subroutine print_memuse_stats !####################################################################### subroutine mem_dump ( memuse ) use mpp_mod, only : stdout use mpp_io_mod, only : mpp_open, mpp_close, mpp_ascii, mpp_rdonly, & mpp_sequential, mpp_single real, intent(out) :: memuse ! This routine returns the memory usage on Linux systems. ! It does this by querying a system file (file_name below). ! It is intended for use by print_memuse_stats above. character(len=32) :: file_name = '/proc/self/status' character(len=32) :: string integer, save :: mem_unit = -1 real :: multiplier memuse = 0.0 multiplier = 1.0 if(mem_unit == -1) then call mpp_open ( mem_unit, file_name, & form=MPP_ASCII, action=MPP_RDONLY, & access=MPP_SEQUENTIAL, threading=MPP_SINGLE ) else rewind(mem_unit) endif do; read (mem_unit,'(a)', end=10) string if ( INDEX ( string, 'VmHWM:' ) == 1 ) then read (string(7:LEN_TRIM(string)-2),*) memuse exit endif enddo if (TRIM(string(LEN_TRIM(string)-1:)) == "kB" ) & multiplier = 1.0/1024. ! Convert from kB to MB !10 call mpp_close ( mem_unit ) 10 memuse = memuse * multiplier return end subroutine mem_dump end module memutils_mod