# 1 "../memutils/memutils.F90"
!***********************************************************************
!* GNU Lesser General Public License
!*
!* This file is part of the GFDL Flexible Modeling System (FMS).
!*
!* FMS is free software: you can redistribute it and/or modify it under
!* the terms of the GNU Lesser General Public License as published by
!* the Free Software Foundation, either version 3 of the License, or (at
!* your option) any later version.
!*
!* FMS is distributed in the hope that it will be useful, but WITHOUT
!* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
!* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
!* for more details.
!*
!* You should have received a copy of the GNU Lesser General Public
!* License along with FMS. If not, see .
!***********************************************************************
module memutils_mod
!Author: Balaji (V.Balaji@noaa.gov)
!Various operations for memory management
!these currently include efficient methods for memory-to-memory copy
!including strided data and arbitrary gather-scatter vectors
!also various memory and cache inquiry operators
implicit none
private
# 29
integer(kind=8) :: l1_cache_line_size, l1_cache_size, l1_associativity
integer(kind=8) :: l2_cache_line_size, l2_cache_size, l2_associativity
logical :: memutils_initialized=.FALSE.
interface memcpy
module procedure memcpy_r8
module procedure memcpy_r8_gather
module procedure memcpy_r8_scatter
module procedure memcpy_r8_gather_scatter
end interface
public :: get_l1_cache_line, get_l2_cache_line, memcpy, memutils_init
public :: print_memuse_stats
# 47
# 50
logical, private :: print_memory_usage=.FALSE.
contains
subroutine memutils_init(print_flag)
!initialize memutils module
!currently sets default cache characteristics
!(will provide overrides later)
!also sets pe to my_pe on t3e
logical, optional :: print_flag
# 68
!defaults
l1_cache_line_size = 1
l1_cache_size = 1
l1_associativity = 1
l2_cache_line_size = 1
l2_cache_size = 1
l2_associativity = 1
# 79
if( PRESENT(print_flag) )print_memory_usage = print_flag
memutils_initialized = .TRUE.
return
end subroutine memutils_init
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
! !
!MEMCPY routines: real*8 words are copied from RHS to LHS !
! Either side can have constant stride (lhs_stride, rhs_stride) !
! or indexed by a gather/scatter array (lhs_indx, rhs_indx) !
! index arrays are 0-based (i.e C-like not fortran-like: this is !
! for compatibility with the SHMEM_IXGET/PUT routines) !
! !
! EXAMPLES: !
! !
!Replace !
! a(0:n-1) = b(0:n-1) !
!with !
! call memcpy(a,b,n) !
! !
!Replace !
! a(0:2*n-1:2) = b(0:3*n-1:3) !
!with !
! call memcpy(a,b,dim,n,2,3) !dim.GE.3*n !
! !
!Replace !
! a(0:n-1) = b(indx(1:n)) !
!with !
! call memcpy(a,b,dim,n,1,indx) !dim.GE.max(indx) !
! !
!Replace !
! a(indx(1:n)) = b(0:n-1) !
!with !
! call memcpy(a,b,dim,n,indx,1) !dim.GE.max(indx) !
! !
!Replace !
! a(indxa(1:n)) = b(indxb(1:n)) !
!with !
! call memcpy(a,b,dim,n,indx,indxb) !dim.GE.max(indxa,indxb) !
! !
! There are no error checks!!! (routines are built for speed) !
! Specifically there is no bounds-checking: if the stride or !
! indexing causes you to exceed you will have done a !
! potentially unsafe memory load !
! !
!T3E: we use the shmem routines on-processor to effect the transfer !
! via the (faster) E-registers !
! !
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
subroutine memcpy_r8( lhs, rhs, dim, nelems, lhs_stride, rhs_stride )
!base routine: handles constant stride memcpy
!default strides are of course 1
integer, intent(in) :: dim
real(kind=8), dimension(0:dim-1), intent(in) :: rhs
real(kind=8), dimension(0:dim-1), intent(out) :: lhs
integer, intent(in), optional :: nelems, lhs_stride, rhs_stride
integer :: n, rs, ls
!defaults
n = dim
ls = 1
rs = 1
if( PRESENT(nelems) )then
n = nelems
!only check for stride if nelems is present
if( PRESENT(lhs_stride) )ls = lhs_stride
if( PRESENT(rhs_stride) )rs = rhs_stride
endif
if( ls.EQ.1 .AND. rs.EQ.1 )then
# 151
lhs(0:n-1) = rhs(0:n-1)
else
# 157
lhs(0:n*ls-1:ls) = rhs(0:n*rs-1:rs)
endif
return
end subroutine memcpy_r8
subroutine memcpy_r8_gather( lhs, rhs, dim, nelems, lhs_stride, rhs_indx )
!memcpy routine with gather: copies nelems words from rhs(indx(:)) to lhs(:)
integer, intent(in) :: dim, nelems, lhs_stride
real(kind=8), dimension(0:dim-1), intent(in) :: rhs
real(kind=8), dimension(0:dim-1), intent(out) :: lhs
integer, intent(in), dimension(nelems) :: rhs_indx
# 180
lhs(0:nelems*lhs_stride-1:lhs_stride) = rhs(rhs_indx(1:nelems))
return
end subroutine memcpy_r8_gather
subroutine memcpy_r8_scatter( lhs, rhs, dim, nelems, lhs_indx, rhs_stride )
!memcpy routine with scatter: copies nelems words from rhs(:) to lhs(indx(:))
integer, intent(in) :: dim, nelems, rhs_stride
real(kind=8), dimension(0:dim-1), intent(in) :: rhs
real(kind=8), dimension(0:dim-1), intent(out) :: lhs
integer, intent(in), dimension(nelems) :: lhs_indx
# 203
lhs(lhs_indx(1:nelems)) = rhs(0:nelems*rhs_stride-1:rhs_stride)
return
end subroutine memcpy_r8_scatter
subroutine memcpy_r8_gather_scatter( lhs, rhs, dim, nelems, lhs_indx, rhs_indx )
!memcpy routine with gather/scatter: copies nelems words from rhs(indx(:)) to lhs(indx(:))
integer, intent(in) :: dim, nelems
real(kind=8), dimension(0:dim-1), intent(in) :: rhs
real(kind=8), dimension(0:dim-1), intent(out) :: lhs
integer, intent(in), dimension(nelems) :: lhs_indx, rhs_indx
# 222
lhs(lhs_indx(1:nelems)) = rhs(rhs_indx(1:nelems))
return
end subroutine memcpy_r8_gather_scatter
# 244
# 262
!cache utilities: need to write version for other argument types
function get_l1_cache_line(a)
integer(kind=8) :: get_l1_cache_line
real, intent(in) :: a
integer(kind=8) :: i
i = LOC(a)
get_l1_cache_line = mod(i,l1_cache_size/l1_associativity)/l1_cache_line_size
end function get_l1_cache_line
function get_l2_cache_line(a)
integer(kind=8) :: get_l2_cache_line
real, intent(in) :: a
integer(kind=8) :: i
i = LOC(a)
get_l2_cache_line = mod(i,l2_cache_size/l2_associativity)/l2_cache_line_size
end function get_l2_cache_line
subroutine print_memuse_stats( text, unit, always )
use mpp_mod, only: mpp_pe, mpp_root_pe, mpp_npes, mpp_min, mpp_max, mpp_sum, stderr
character(len=*), intent(in) :: text
integer, intent(in), optional :: unit
logical, intent(in), optional :: always
real :: m, mmin, mmax, mavg, mstd
integer :: mu
!memuse is an external function: works on SGI
!use #ifdef to generate equivalent on other platforms.
integer :: memuse !default integer OK?
character(len=8) :: walldate
character(len=10) :: walltime
character(len=5) :: wallzone
integer :: wallvalues(8)
if( PRESENT(always) )then
if( .NOT.always )return
else
if( .NOT.print_memory_usage )return
end if
mu = stderr(); if( PRESENT(unit) )mu = unit
# 304
call mem_dump(m)
mmin = m; call mpp_min(mmin)
mmax = m; call mpp_max(mmax)
mavg = m; call mpp_sum(mavg); mavg = mavg/mpp_npes()
mstd = (m-mavg)**2; call mpp_sum(mstd); mstd = sqrt( mstd/mpp_npes() )
if( mpp_pe().EQ.mpp_root_pe() ) then
call DATE_AND_TIME(walldate, walltime, wallzone, wallvalues)
write( mu,'(a84,4es11.3)' ) trim(walldate)//' '//trim(walltime)//&
': Memuse(MB) at '//trim(text)//'=', mmin, mmax, mstd, mavg
endif
return
end subroutine print_memuse_stats
!#######################################################################
subroutine mem_dump ( memuse )
use mpp_mod, only : stdout
use mpp_io_mod, only : mpp_open, mpp_close, mpp_ascii, mpp_rdonly, &
mpp_sequential, mpp_single
real, intent(out) :: memuse
! This routine returns the memory usage on Linux systems.
! It does this by querying a system file (file_name below).
! It is intended for use by print_memuse_stats above.
character(len=32) :: file_name = '/proc/self/status'
character(len=32) :: string
integer, save :: mem_unit = -1
real :: multiplier
memuse = 0.0
multiplier = 1.0
if(mem_unit == -1) then
call mpp_open ( mem_unit, file_name, &
form=MPP_ASCII, action=MPP_RDONLY, &
access=MPP_SEQUENTIAL, threading=MPP_SINGLE )
else
rewind(mem_unit)
endif
do; read (mem_unit,'(a)', end=10) string
if ( INDEX ( string, 'VmHWM:' ) == 1 ) then
read (string(7:LEN_TRIM(string)-2),*) memuse
exit
endif
enddo
if (TRIM(string(LEN_TRIM(string)-1:)) == "kB" ) &
multiplier = 1.0/1024. ! Convert from kB to MB
!10 call mpp_close ( mem_unit )
10 memuse = memuse * multiplier
return
end subroutine mem_dump
end module memutils_mod