module raflib !$$$ module documentation block ! . . . . ! module: raflib contains anisotropic filter routines ! prgmmr: parrish org: np22 date: 2005-11-30 ! ! abstract: contains routines for initializing and applying anisotropic ! recursive filter. ! ! program history log: ! 2005-11-30 parrish ! 2005-11-30 parrish -- replace blended triad routine gettri4 with ! newer version provided by Jim Purser. ! 2011-07-04 todling - fix mpi call to run either single or double prec ! ! subroutines included: ! sub set_indices - ! sub init_raf4_wrap - ! sub raf4_ad_wrap - ! sub raf_sm4_ad_wrap - ! sub raf4_wrap - ! sub raf_sm4_wrap - ! sub adjoint_check4 - ! sub raf4_ad - ! sub raf_sm4_ad - ! sub rad_sm24_ad - ! sub alpha_beta4 - ! sub alpha_betaa4 - ! sub count_strings - ! sub gethex - ! sub indexxi4 - ! sub indexxi8 - ! sub init_raf4 - ! sub normalize2_raf4 - ! sub one_color4 - ! sub one_color24 - ! sub raf4 - ! sub raf_sm4 - ! sub sort_strings4 - ! sub string_assemble4 - ! sub string_label - ! sub my_gatherv8 - ! sub my_scatterv8 - ! sub what_color_is - ! sub add_halox0 - ! sub add_haloy0 - ! sub add_halo_x - ! sub add_halo_y - ! sub one_color4_new_factorization - ! sub one_color24_new_factorization - ! sub new_alpha_beta4 - ! sub new_alpha_betaa4 - ! sub stringop - ! sub quadfil - ! ! Variable Type Definition: ! def filter_cons - structure variable containing information ! for anisotropic filter--created by init_raf4 ! ! attributes: ! language: f90 ! machine: ibm RS/6000 SP ! !$$$ end documentation block use kinds,only: r_double,r_quad,r_single,i_byte,i_long,i_llong,i_short use mpimod,only: mpi_comm_world,mpi_integer,mpi_integer1,mpi_integer2,mpi_integer4, & mpi_integer8,mpi_max,mpi_min,mpi_real4,mpi_real8,mpi_real16,mpi_sum use constants, only: zero,half,one,two,zero_quad,zero_single use gsi_io, only: verbose implicit none ! set default to private private ! set subroutines to public public :: set_indices public :: init_raf4_wrap public :: raf4_ad_wrap public :: raf_sm4_ad_wrap public :: raf4_wrap public :: raf_sm4_wrap public :: adjoint_check4 public :: raf4_ad public :: raf_sm4_ad public :: rad_sm24_ad public :: alpha_beta4 public :: alpha_betaa4 public :: count_strings public :: gethex public :: indexxi4 public :: indexxi8 public :: init_raf4 public :: normalize2_raf4 public :: one_color4 public :: one_color24 public :: raf4 public :: raf_sm4 public :: sort_strings4 public :: string_assemble4 public :: string_label public :: my_gatherv8 public :: my_scatterv8 public :: what_color_is public :: add_halox0 public :: add_haloy0 public :: add_halo_x public :: add_halo_y public :: one_color4_new_factorization public :: one_color24_new_factorization public :: new_alpha_beta4 public :: new_alpha_betaa4 public :: stringop public :: quadfil ! set passed variables to public public :: filter_cons,filter_indices ! declare type structure for recursive anisotropic filter constants type filter_cons sequence logical new_factorization integer(i_long) ngauss integer(i_long) npass integer(i_long) ifilt_ord integer(i_long) npointsmaxall integer(i_long) npointsmax integer(i_long) npoints_send integer(i_long) npoints_recv integer(i_long) nstrings integer(i_long) nsmooth integer(i_long) nsmooth_shapiro integer(i_long) nrows integer(i_long) nsend_halox_loc integer(i_long) nrecv_halox_loc integer(i_long) nsend_haloy_loc integer(i_long) nrecv_haloy_loc integer(i_long),pointer:: nrecv_halox(:) => NULL() integer(i_long),pointer:: ndrecv_halox(:) => NULL() integer(i_long),pointer:: nsend_halox(:) => NULL() integer(i_long),pointer:: ndsend_halox(:) => NULL() integer(i_long),pointer:: info_send_halox(:,:) => NULL() integer(i_long),pointer:: info_recv_halox(:,:) => NULL() integer(i_long),pointer:: nrecv_haloy(:) => NULL() integer(i_long),pointer:: ndrecv_haloy(:) => NULL() integer(i_long),pointer:: nsend_haloy(:) => NULL() integer(i_long),pointer:: ndsend_haloy(:) => NULL() integer(i_long),pointer:: info_send_haloy(:,:) => NULL() integer(i_long),pointer:: info_recv_haloy(:,:) => NULL() integer(i_long),pointer::istart(:) => NULL() integer(i_long),pointer::ib(:) => NULL() integer(i_long),pointer::nrecv(:) => NULL() integer(i_long),pointer::ndrecv(:) => NULL() integer(i_long),pointer::nsend(:) => NULL() integer(i_long),pointer::ndsend(:) => NULL() real(r_single),pointer:: gnorm_halox(:) => NULL() real(r_single),pointer:: gnorm_haloy(:) => NULL() real(r_single),pointer::lnf(:,:,:,:) => NULL() real(r_single),pointer::bnf(:,:,:) => NULL() real(r_single),pointer::fmat(:,:,:,:,:) => NULL() real(r_single),pointer::fmat0(:,:,:,:) => NULL() real(r_single),pointer::amp(:,:,:,:) => NULL() integer(i_short),pointer::ia(:) => NULL() integer(i_short),pointer::ja(:) => NULL() integer(i_short),pointer::ka(:) => NULL() end type filter_cons !--------------------------------------------------------------- ! Type valiable to simplify the subroutine interfaces !--------------------------------------------------------------- type filter_indices integer(i_long)::ids, ide, jds, jde, kds, kde, & ! domain indices ips, ipe, jps, jpe, kps, kpe ! patch indices end type filter_indices contains subroutine set_indices(indices, & ids, ide, jds, jde, kds, kde, & ips, ipe, jps, jpe, kps, kpe ) !$$$ subprogram documentation block ! . . . ! subprogram: set_indices ! ! prgmmr: sato ! ! abstract: initialize type_indices ! ! program history log: ! 2008-11-03 sato ! ! input argument list: ! ids, ide, jds, jde, kds, kde - domain indices ! ips, ipe, jps, jpe, kps, kpe - patch indices ! ! output argument list: ! indices - type variable which contains all indices ! ! attributes: ! language: f90 ! machine: ! !$$$ end documentation block implicit none type(filter_indices),intent( out) :: indices integer(i_long) ,intent(in ) :: ids, ide, jds, jde, kds, kde, & ! domain indices ips, ipe, jps, jpe, kps, kpe ! patch indices indices%ids=ids; indices%ide=ide indices%jds=jds; indices%jde=jde indices%kds=kds; indices%kde=kde indices%ips=ips; indices%ipe=ipe indices%jps=jps; indices%jpe=jpe indices%kps=kps; indices%kpe=kpe end subroutine set_indices subroutine init_raf4_wrap(aspect,triad4,ngauss,rgauss,npass,normal,binom,ifilt_ord,filter, & nsmooth,nsmooth_shapiro, & nvars,idvar,kvar_start,kvar_end,var_names,idx,mype,npes) !$$$ subprogram documentation block ! . . . . ! subprogram: init_raf4_wrap ! prgmmr: ! ! abstract: ! ! program history log: ! 2009-09-25 lueken - added subprogram doc block ! ! input argument list: ! triad4,binom ! ngauss,normal,npass,ifilt_ord,nvars ! rguass ! idvar ! kvar_start ! kvar_end ! var_names ! mype,npes ! nsmooth ! nsmooth_shapiro ! filter ! idx ! aspect ! ! output argument list: ! filter ! idx ! nsmooth ! aspect ! ! attributes: ! language: f90 ! machine: ! !$$$ end documentation block implicit none type(filter_cons), intent(inout) :: filter(7) type(filter_indices),intent(inout) :: idx real(r_single), intent(inout) :: aspect(7, & idx%ips:idx%ipe, & idx%jps:idx%jpe, & idx%kps:idx%kpe) logical, intent(in ) :: triad4,binom integer(i_long), intent(in ) :: ngauss,normal,npass,ifilt_ord,nvars real(r_double), intent(in ) :: rgauss(ngauss) integer(i_long), intent(in ) :: idvar(idx%kds:idx%kde) integer(i_long), intent(in ) :: kvar_start(nvars) integer(i_long), intent(in ) :: kvar_end(nvars) character(len=80), intent(in ) :: var_names(nvars) integer(i_long), intent(in ) :: mype, npes integer(i_long), intent(inout) :: nsmooth integer(i_long), intent(in ) :: nsmooth_shapiro ! number of 2nd moment preserving shapiro smoothers call init_raf4(aspect,triad4,ngauss,rgauss,npass,normal,binom,ifilt_ord,filter, & nsmooth,nsmooth_shapiro, & nvars,idvar,kvar_start,kvar_end,var_names, & idx%ids, idx%ide, idx%jds, idx%jde, idx%kds, idx%kde, & ! domain idx idx%ips, idx%ipe, idx%jps, idx%jpe, idx%kps, idx%kpe, & ! patch idx mype, npes) end subroutine init_raf4_wrap subroutine raf4_ad_wrap(g,filter,ngauss,idx,npes) !$$$ subprogram documentation block ! . . . ! subprogram: raf4_ad_wrap ! ! prgrmmr: sato ! ! abstract: interface for raf4_ad with type_indices ! ! program history log: ! 2008-11-03 sato ! ! input argument list: ! filter ! idx ! ngauss ! npes ! ! output argument list: ! g ! ! attributes: ! language: f90 ! machine: ! !$$$ end documentation block implicit none type(filter_cons) ,intent(in ) :: filter(7) type(filter_indices),intent(in ) :: idx integer(i_long) ,intent(in ) :: ngauss, npes real(r_single) ,intent( out) :: g( ngauss, & idx%ips:idx%ipe, & idx%jps:idx%jpe, & idx%kps:idx%kpe ) call raf4_ad(g,filter,ngauss, & idx%ids, idx%ide, idx%jds, idx%jde, & ! domain idx idx%ips, idx%ipe, idx%jps, idx%jpe, idx%kps, idx%kpe, & ! patch idx npes) end subroutine raf4_ad_wrap subroutine raf_sm4_ad_wrap(g,filter,ngauss,idx,npes) !$$$ subprogram documentation block ! . . . ! subprogram: raf4_ad_wrap ! ! prgrmmr: sato ! ! abstract: interface for raf4_ad_sm4 with type_indices ! ! program history log: ! 2008-11-03 sato ! ! input argument list: ! filter ! idx ! ngauss ! npes ! ! output argument list: ! g ! ! attributes: ! language: f90 ! machine: ! !$$$ end documentation block implicit none type(filter_cons) ,intent(in ) :: filter(7) type(filter_indices),intent(in ) :: idx integer(i_long) ,intent(in ) :: ngauss, npes real(r_single) ,intent( out) :: g( ngauss, & idx%ips:idx%ipe, & idx%jps:idx%jpe, & idx%kps:idx%kpe ) call raf_sm4_ad(g,filter,ngauss, & idx%ips, idx%ipe, idx%jps, idx%jpe, idx%kps, idx%kpe, & ! patch idx npes) end subroutine raf_sm4_ad_wrap subroutine raf4_wrap(g,filter,ngauss,idx,npes) !$$$ subprogram documentation block ! . . . ! subprogram: raf4_wrap ! ! prgrmmr: sato ! ! abstract: interface for raf4 with type_indices ! ! program history log: ! 2008-11-03 sato ! ! input argument list: ! filter ! idx ! ngauss ! npes ! ! output argument list: ! g ! ! attributes: ! language: f90 ! machine: ! !$$$ end documentation block implicit none type(filter_cons) ,intent(in ) :: filter(7) type(filter_indices),intent(in ) :: idx integer(i_long) ,intent(in ) :: ngauss, npes real(r_single) ,intent( out) :: g( ngauss, & idx%ips:idx%ipe, & idx%jps:idx%jpe, & idx%kps:idx%kpe ) call raf4(g,filter,ngauss, & idx%ids, idx%ide, idx%jds, idx%jde, & ! domain idx idx%ips, idx%ipe, idx%jps, idx%jpe, idx%kps, idx%kpe, & ! patch idx npes) end subroutine raf4_wrap subroutine raf_sm4_wrap(g,filter,ngauss,idx,npes) !$$$ subprogram documentation block ! . . . ! subprogram: raf_sm4_wrap ! ! prgrmmr: sato ! ! abstract: interface for raf_sm4 with type_indices ! ! program history log: ! 2008-11-03 sato ! ! input argument list: ! filter ! idx ! ngauss ! npes ! ! output argument list: ! g ! ! attributes: ! language: f90 ! machine: ! !$$$ end documentation block implicit none type(filter_cons) ,intent(in ) :: filter(7) type(filter_indices),intent(in ) :: idx integer(i_long) ,intent(in ) :: ngauss, npes real(r_single) ,intent( out) :: g( ngauss, & idx%ips:idx%ipe, & idx%jps:idx%jpe, & idx%kps:idx%kpe ) call raf_sm4(g,filter,ngauss, & idx%ips, idx%ipe, idx%jps, idx%jpe, idx%kps, idx%kpe, & ! patch idx npes) end subroutine raf_sm4_wrap subroutine adjoint_check4(filter,ngauss,ips,ipe,jps,jpe,kps,kpe,mype,npes) !$$$ subprogram documentation block ! . . . ! subprogram: adjoint_check4 ! ! prgrmmr: ! ! abstract: test that filter is symmetric ! ! program history log: ! 2008-04-22 safford -- add subprogram doc block ! ! input argument list: ! ips, ipe, jps, jpe, kps, kpe - patch indices ! mype - mpi task id ! npes - total num of mpi tasks ! ngauss - num of Gaussian ! filter ! ! output argument list: ! filter ! ! attributes: ! language: f90 ! machine: ! !$$$ end documentation block implicit none INTEGER(i_long) ,INTENT(IN ) :: ips,ipe,jps,jpe,kps,kpe INTEGER(i_long) ,INTENT(IN ) :: mype,npes,ngauss TYPE(filter_cons),intent(inout) :: filter(7) ! structure defining recursive filter real(r_single) xvec( ngauss,ips:ipe, jps:jpe, kps:kpe ) real(r_single) yvec( ngauss,ips:ipe, jps:jpe, kps:kpe ) real(r_single) zvec( ngauss,ips:ipe, jps:jpe, kps:kpe ) integer(i_long) i,igauss,j,k real(r_quad) yty,xtz,one_quad real(r_quad) yty0(npes),xtz0(npes) integer(i_long) ierror one_quad=zero_quad xvec=zero_single yvec=zero_single zvec=zero_single call random_number(xvec) yvec=xvec call raf_sm4(yvec,filter,ngauss,ips,ipe,jps,jpe,kps,kpe,npes) zvec=yvec call raf_sm4_ad(zvec,filter,ngauss,ips,ipe,jps,jpe,kps,kpe,npes) yty=zero_quad xtz=zero_quad do k=kps,kpe do j=jps,jpe do i=ips,ipe do igauss=1,ngauss yty=yty+one_quad*yvec(igauss,i,j,k)**2 xtz=xtz+one_quad*xvec(igauss,i,j,k)*zvec(igauss,i,j,k) end do end do end do end do if(r_double==r_quad) then call mpi_gather(yty,1,mpi_real8 ,yty0,1,mpi_real8 ,0,mpi_comm_world,ierror) call mpi_gather(xtz,1,mpi_real8 ,xtz0,1,mpi_real8 ,0,mpi_comm_world,ierror) else call mpi_gather(yty,1,mpi_real16,yty0,1,mpi_real16,0,mpi_comm_world,ierror) call mpi_gather(xtz,1,mpi_real16,xtz0,1,mpi_real16,0,mpi_comm_world,ierror) endif if(mype==0) then yty=zero_quad xtz=zero_quad do i=1,npes yty=yty+yty0(i) xtz=xtz+xtz0(i) end do write(6,*)' adjoint check for raf,ad_raf, yty,xtz=',yty,xtz end if end subroutine adjoint_check4 SUBROUTINE raf4_ad(g,filter,ngauss,ids,ide,jds,jde,ips,ipe,jps,jpe,kps,kpe,npes) !$$$ subprogram documentation block ! . . . ! subprogram: raf4_ad ! ! prgrmmr: ! ! abstract: 2nd half of recursive anisotropic self-adjoint filter (full-strings version) ! ! program history log: ! 2008-04-22 safford -- add subprogram doc block ! ! input argument list: ! ids, ide, jds, jde - domain indices ! ips, ipe, jps, jpe, kps, kpe - patch indices ! ngauss - num of Gaussian ! npes - total num of mpi tasks ! g ! filter ! ! output argument list: ! g ! ! attributes: ! language: f90 ! machine: ! !$$$ end documentation block implicit none INTEGER(i_long) ,INTENT(IN ) :: ids, ide, jds, jde, & ips, ipe, jps, jpe, kps, kpe INTEGER(i_long) ,INTENT(IN ) :: ngauss,npes real(r_single), DIMENSION( ngauss,ips:ipe, jps:jpe, kps:kpe ),INTENT(INOUT) :: & g ! input--field to be filtered, output--filtered field TYPE(filter_cons) ,intent(in ) :: filter(7) ! structure defining recursive filter integer(i_long) i,icolor,igauss,ipass,j,k,iadvance,iback if(filter(1)%npass>0) then do ipass=filter(1)%npass,1,-1 do icolor=1,7 if(filter(icolor)%npointsmaxall>0) then if(filter(1)%new_factorization) then iadvance=2_i_long iback=1 call one_color4_new_factorization(g,filter(icolor),ngauss,ipass,filter(1)%ifilt_ord, & filter(icolor)%nstrings,filter(icolor)%istart, & ips,ipe,jps,jpe,kps,kpe,npes,iadvance,iback) else call one_color4(g,filter(icolor),ngauss,ipass,filter(1)%ifilt_ord, & filter(icolor)%nstrings,filter(icolor)%istart,ips,ipe,jps,jpe,kps,kpe,npes) end if end if end do end do end if ! apply smoothing if nsmooth or nsmooth_shapiro > 0 if(kps<=kpe) then do i=1,max(filter(1)%nsmooth,filter(1)%nsmooth_shapiro) call smther(filter(1),g,filter(1)%nrows,ngauss,filter(1)%nsmooth,filter(1)%nsmooth_shapiro, & ids,ide,jds,jde,ips,ipe,jps,jpe,kps,kpe, & npes,filter(1)%gnorm_halox,filter(1)%gnorm_haloy,.true.) end do end if ! finally multiply by amp do k=kps,kpe do j=jps,jpe do i=ips,ipe do igauss=1,ngauss g(igauss,i,j,k)=filter(1)%amp(igauss,i,j,k)*g(igauss,i,j,k) end do end do end do end do end subroutine raf4_ad SUBROUTINE raf_sm4_ad(g,filter,ngauss,ips,ipe,jps,jpe,kps,kpe,npes) ! !$$$ subprogram documentation block ! . . . ! subprogram: raf_sm4_ad ! ! prgrmmr: ! ! abstract: ! ! program history log: ! 2008-04-22 safford -- add subprogram doc block ! ! input argument list: ! ips, ipe, jps, jpe, kps, kpe - patch indices ! npes - ! ngauss - ! g ! filter ! ! output argument list: ! g ! ! attributes: ! language: f90 ! machine: ! !$$$ end documentation block implicit none INTEGER(i_long) ,INTENT(IN ) :: & ips, ipe, jps, jpe, kps, kpe ! patch indices INTEGER(i_long) ,INTENT(IN ) :: npes,ngauss real(r_single), DIMENSION( ngauss,ips:ipe, jps:jpe, kps:kpe ),INTENT(INOUT) :: & g ! input--field to be filtered, output--filtered field TYPE(filter_cons) ,intent(in ) :: filter(7) ! structure defining recursive filter integer(i_long) icolor,ipass,iadvance,iback if(filter(1)%npass>0) then do ipass=filter(1)%npass,1,-1 do icolor=1,7 if(filter(icolor)%npointsmaxall>0) then if(filter(1)%new_factorization) then iadvance=2_i_long iback=1 call one_color4_new_factorization(g,filter(icolor),ngauss,ipass,filter(1)%ifilt_ord, & filter(icolor)%nstrings,filter(icolor)%istart, & ips,ipe,jps,jpe,kps,kpe,npes,iadvance,iback) else call one_color4(g,filter(icolor),ngauss,ipass,filter(1)%ifilt_ord, & filter(icolor)%nstrings,filter(icolor)%istart,ips,ipe,jps,jpe,kps,kpe,npes) end if end if end do end do end if end subroutine raf_sm4_ad SUBROUTINE rad_sm24_ad(g,filter,ngauss,ids,ide,jds,jde,ips,ipe,jps,jpe,kps,kpe,npes) !$$$ subprogram documentation block ! . . . ! subprogram: rad_sm24_ad ! ! prgrmmr: ! ! abstract: do two fields at same time, each real(4) ! 2nd half of recursive anisotropic self-adjoint filter (full-strings version) ! (no amplitude factor--used only for local weighted averages) ! ! program history log: ! 2008-04-22 safford -- add subprogram doc block ! ! input argument list: ! ids, ide, jds, jde - domain indices ! ips, ipe, jps, jpe, kps, kpe - patch indices ! npes - ! ngauss - ! g ! filter ! ! output argument list: ! g ! ! attributes: ! language: f90 ! machine: ! !$$$ implicit none INTEGER(i_long) ,INTENT(IN ) :: & ids,ide,jds,jde,ips,ipe,jps,jpe,kps,kpe INTEGER(i_long) ,INTENT(IN ) :: npes,ngauss real(r_single), DIMENSION(2,ngauss,ips:ipe, jps:jpe, kps:kpe ),INTENT(INOUT) :: & g ! input--field to be filtered, output--filtered field TYPE(filter_cons) ,intent(in ) :: filter(7) ! structure defining recursive filter integer(i_long) icolor,ipass,i,ii,j,k,kk,n,iadvance,iback real(r_single) gwork(ngauss,ips:ipe,jps:jpe,kps:kpe) if(filter(1)%npass>0) then do ipass=filter(1)%npass,1,-1 do icolor=1,7 if(filter(icolor)%npointsmaxall>0) then if(filter(1)%new_factorization) then iadvance=2_i_long iback=1 call one_color24_new_factorization(g,filter(icolor),ngauss,ipass,filter(1)%ifilt_ord, & filter(icolor)%nstrings,filter(icolor)%istart, & ips,ipe,jps,jpe,kps,kpe,npes,iadvance,iback) else call one_color24(g,filter(icolor),ngauss,ipass,filter(1)%ifilt_ord, & filter(icolor)%nstrings,filter(icolor)%istart,ips,ipe,jps,jpe,kps,kpe,npes) end if end if end do end do end if ! apply smoothing if nsmooth or nsmooth_shapiro > 0 if(max(filter(1)%nsmooth,filter(1)%nsmooth_shapiro) > 0.and.kps<=kpe) then do kk=1,2 do k=kps,kpe do j=jps,jpe do i=ips,ipe do n=1,ngauss gwork(n,i,j,k)=g(kk,n,i,j,k) end do end do end do end do do ii=1,max(filter(1)%nsmooth,filter(1)%nsmooth_shapiro) call smther(filter(1),gwork,filter(1)%nrows,ngauss,filter(1)%nsmooth,filter(1)%nsmooth_shapiro, & ids,ide,jds,jde,ips,ipe,jps,jpe,kps,kpe, & npes,filter(1)%gnorm_halox,filter(1)%gnorm_haloy,.true.) end do do k=kps,kpe do j=jps,jpe do i=ips,ipe do n=1,ngauss g(kk,n,i,j,k)=gwork(n,i,j,k) end do end do end do end do end do end if end subroutine rad_sm24_ad subroutine alpha_beta4(info_string,aspect_full,rgauss,lnf,bnf,igauss,ngauss, & istart_out,npoints_mype,binomial,npass,ifilt_ord, & lenbar,lenmax,lenmin,npoints1,nvars) !$$$ subprogram documentation block ! . . . ! subprogram: alpha_beta4 ! ! prgrmmr: ! ! abstract: compute recursion constants alpha and beta along unbroken strings ! ! program history log: ! 2008-04-22 safford -- add subprogram doc block ! ! input argument list: ! npoints_mype,npass,ifilt_ord ! binomial ! info_string ! aspect_full ! igauss,ngauss ! rgauss ! lnf,bnf ! nvars ! ! output argument list: ! istart_out ! lenmax,lenmin,npoints1 ! lenbar ! lnf,bnf ! ! attributes: ! language: f90 ! machine: ! !$$$ end documentation block IMPLICIT NONE INTEGER(i_long) ,INTENT(IN ) :: nvars INTEGER(i_long) ,INTENT(IN ) :: npoints_mype,npass,ifilt_ord REAL(r_double) , DIMENSION( 10, 10 ) ,INTENT(IN ) :: & binomial INTEGER(i_short), DIMENSION( 8, npoints_mype ),INTENT(IN ) :: & info_string ! 1---- distance from origin to current point ! 2,3,4-- origin coordinates ! 5,6,7,8-- jumpx,jumpy,jumpz,ivar for this string real(r_single) , DIMENSION( npoints_mype ) ,INTENT(IN ) :: & aspect_full integer(i_long) ,intent(in ) :: igauss,ngauss real(r_double) ,intent(in ) :: rgauss real(r_single) ,intent(inout) :: & lnf(ifilt_ord,npoints_mype,npass,ngauss),bnf(npoints_mype,npass,ngauss) integer(i_long) ,intent( out) :: istart_out(*) integer(i_long) ,intent( out) :: lenmax(nvars),lenmin(nvars),& npoints1(nvars) ! diagnostic output--to look at string real(r_double) ,intent( out) :: lenbar(nvars) integer(i_long) i,iend,ipass,istart,ivar integer(i_long) nstrings nstrings=0 istart=1 if(npoints_mype>1) then do i=2,npoints_mype if(info_string(1,i)/=info_string(1,i-1)+1.or. & info_string(2,i)/=info_string(2,i-1).or. & info_string(3,i)/=info_string(3,i-1).or. & info_string(4,i)/=info_string(4,i-1).or. & info_string(5,i)/=info_string(5,i-1).or. & info_string(6,i)/=info_string(6,i-1).or. & info_string(7,i)/=info_string(7,i-1).or. & info_string(8,i)/=info_string(8,i-1)) then iend=i-1 if(igauss==1) then ivar=info_string(8,iend) lenbar(ivar)=lenbar(ivar)+(iend-istart+1) lenmax(ivar)=max(iend-istart+1,lenmax(ivar)) lenmin(ivar)=min(iend-istart+1,lenmin(ivar)) if(iend==istart) npoints1(ivar)=npoints1(ivar)+1 end if nstrings=nstrings+1 istart_out(nstrings)=istart istart_out(nstrings+1)=iend+1 do ipass=1,npass call alpha_betaa4(aspect_full(istart),rgauss,iend-istart+1,binomial(ipass,npass), & lnf(1,istart,ipass,igauss),bnf(istart,ipass,igauss),ifilt_ord) end do istart=iend+1 end if end do end if iend=npoints_mype if(igauss==1) then ivar=info_string(8,iend) lenbar(ivar)=lenbar(ivar)+(iend-istart+1) lenmax(ivar)=max(iend-istart+1,lenmax(ivar)) lenmin(ivar)=min(iend-istart+1,lenmin(ivar)) if(iend==istart) npoints1(ivar)=npoints1(ivar)+1 end if nstrings=nstrings+1 istart_out(nstrings)=istart istart_out(nstrings+1)=iend+1 do ipass=1,npass call alpha_betaa4(aspect_full(istart),rgauss,iend-istart+1,binomial(ipass,npass), & lnf(1,istart,ipass,igauss),bnf(istart,ipass,igauss),ifilt_ord) end do end subroutine alpha_beta4 subroutine alpha_betaa4(aspect,rgauss,ng,binomial,lnf,bnf,m) !$$$ subprogram documentation block ! . . . ! subprogram: alpha_betaa4 ! ! prgrmmr: ! ! abstract: compute various constants for Purser 1-d high-order filter ! ! program history log: ! 2008-04-22 safford -- add subprogram doc block, rm unused vars ! ! input argument list: ! aspect - correlation scale (squared, i think), grid units ! rgauss - multiplier of aspect, for multiple gaussians--used for fat-tail filter ! ng - length of string ! binomial - weighting factors (perhaps not needed with high-order filter) ! m - filter order ! ! output argument list: ! lnf,bnf - filter parameters ! ! attributes: ! language: f90 ! machine: ibm RS/6000 SP ! !$$$ end documentation block IMPLICIT NONE INTEGER(i_long) ,INTENT(IN ) :: ng,m REAL(r_double) ,INTENT(IN ) :: binomial real(r_single), DIMENSION( ng ),INTENT(IN ) :: aspect real(r_double) ,intent(in ) :: rgauss real(r_single) ,intent( out) :: lnf(m,ng),bnf(ng) real(r_double) sig(ng),snu(ng) real(r_double) lnf8(m,ng),bnf8(ng) integer(i_long) i,j do i=1,ng sig(i)=sqrt(rgauss*aspect(i)*binomial) snu(i)=one end do call coefrf(sig,snu,ng,m,bnf8,lnf8) do i=1,ng bnf(i)=bnf8(i) do j=1,m lnf(j,i)=lnf8(j,i) end do end do end subroutine alpha_betaa4 subroutine count_strings(info_string,nstrings,nstrings_var,nvars,npoints_mype) !$$$ subprogram documentation block ! . . . ! subprogram: count_strings ! ! prgrmmr: ! ! abstract: compute recursion constants alpha and beta along unbroken strings ! ! program history log: ! 2008-04-22 safford -- add subprogram doc block ! ! input argument list: ! info_string 1 - distance from origin to current point ! 2,3,4 - origin coordinates ! 5,6,7,8 - jumpx,jumpy,jumpz,ivar for this string ! nvars - ! npoints_mype - ! ! output argument list: ! nstrings,nstrings_var ! ! attributes: ! language: f90 ! machine: ibm RS/6000 SP ! !$$$ end documentation block IMPLICIT NONE INTEGER(i_long) ,INTENT(IN ) :: npoints_mype,nvars INTEGER(i_short), DIMENSION( 8, npoints_mype ),INTENT(IN ) :: & info_string ! 1---- distance from origin to current point ! 2,3,4-- origin coordinates ! 5,6,7,8-- jumpx,jumpy,jumpz,ivar for this string integer(i_long) ,intent( out) :: nstrings,nstrings_var(nvars) integer(i_long) i,iend,istart,ivar nstrings=0 istart=1 if(npoints_mype>1) then do i=2,npoints_mype if(info_string(1,i)/=info_string(1,i-1)+1.or. & info_string(2,i)/=info_string(2,i-1).or. & info_string(3,i)/=info_string(3,i-1).or. & info_string(4,i)/=info_string(4,i-1).or. & info_string(5,i)/=info_string(5,i-1).or. & info_string(6,i)/=info_string(6,i-1).or. & info_string(7,i)/=info_string(7,i-1).or. & info_string(8,i)/=info_string(8,i-1)) then iend=i-1 nstrings=nstrings+1 ivar=info_string(8,iend) nstrings_var(ivar)=nstrings_var(ivar)+1 istart=iend+1 end if end do iend=npoints_mype nstrings=nstrings+1 ivar=info_string(8,iend) nstrings_var(ivar)=nstrings_var(ivar)+1 end if return end subroutine count_strings SUBROUTINE GETHEX(UTARGET,LGUESS,LHEXAD,LUI,WHEXAD,KT) !$$$ subprogram documentation block ! . . . ! subprogram: GETHEX ! ! prgrmmr: Purser 1997 ! ! abstract: ! Apply implicit lattice transformations until the target tensor UTARGET ! can be represented as a positive combination of the components of a ! "canonical hexad", with coefficients which may each be interpreted as the ! grid-unit "spread" component in the associated generalized grid direction. ! The target tensor is given in basic grid units. If "LBASIS" is ! a triplet of integer 3-vectors, collectively of determinant = +4, and ! with the all Nth component of these vectors either odd or even, the convex ! hull of the six vectors, {LBASIS and -LBASIS}, forms an octahedron. The ! midpoints of the 12 edges form the 2 diametricaly opposite pairs of sets, ! each consisting of 6 distinct integer vectors, LHEXAD, which are the ! hexad of generalized grid steps along which the application of appropriately ! weighted smoothers will result in the target spread. The requisite weights, ! WHEXAD, are the spreads in the individual hexad directions when expressed ! in the natural grid-units of each of these 6 directions. These weights are ! such that the matrix LU multiplied by WHEXAD gives UTARGET, where the Jth ! column of LU is 6-vector representation of the degenerate tensor describing ! a spread in the direction of hexad-J of one unit of the grid spacing in this ! direction. matrix LUI is the transpose of the inverse of LU. ! ! HOW IT WORKS: ! For the given target tensor, the hexad and weights are defined iteratively. ! A valid "guess" for LHEXAD, LU, LUI, must first be provided. If these are ! not provided by the user, just set the flag LGUESS to 0, and the routine ! will provide feasible default values; otherwise set LGUESS > 0. ! First let us suppose the given hexad and accompanying LU, LUI are valid. ! Then we may compute the corresponding weights: ! WHEXAD = (LUI-transpose)*UTARGET ! If the hexad really IS valid, these weights will all be non-negative and ! the task is done. But if some weight is negative, then the hexad is invalid ! and an alternative valid hexad must be sought. In this situation, the ! algorithm first determines the MOST negative weight and its associated ! grid direction and, keeping the other five directions the same, replaces ! the offending one with the unique (up to sign change) alternative such ! that the NEW hexad are also the midpoints of some grid-octahedron formed from ! an integer-vector basis of determinant = 4 and of the required form. ! To preserve algorithmic symmetry, the enumeration of the new grid directions ! undergoes a permutation. Since only one column of LU changes, the work needed ! to update LUI is less that the work needed to compute the new inverse from ! scratch (cf the "simplex method" of linear programming). We can also exploit ! the special property of this problem that the determinant of LU changes from ! +1 to -1 when the offending column is replaced by its alternative. The ! change in the enumeration of the vectors LHEXAD and of the columns of LU and ! LUI can be regarded as a way to preserve the pattern of implied geometrical ! relationships among these vectors, so that the algorithm is relatively simple. ! By repeating this step, the algorithm eventually homes in on the ! uniquely valid hexad. ! ! The "cuboctahedron" associated with the default guess basis is shown in 3 ! orthogonal views below, hexad vector shown thus, (); basis vectors, []. ! ! ! (6)--------(3) ! | | \ (In each view, nearest facet is speckled) ! | [1] | \[3] ! | | \ <===== top view ! | | \ ! (-4)-------(5)--------(1) south view ! \::::::::| | // ! \::::::| | // ! [-3]\::::| [2] | // east view ! \::| | // || ! (2)--------(-6) // || ! // \/ ! (-4)------ (2) // (2)------- (5) ! | |::\ // | | \ ! | |::::\[2] // | | \ ! | |::::::\ <=====// | | \ ! | |::::::::\ | | \ ! (-1)------(-3)--------(-6) (-6)--------(1)--------(3) ! \ | | \::::::::| | ! \ | | \::::::| | ! \ | | \::::| | ! \ | | \::| | ! (-5)-------(4) (4)-------(-2) ! ! ! ! program history log: ! 2008-04-22 safford -- add subprogram doc block ! ! input argument list: ! UTARGET - 6-vector comprising components of the target aspect tensor ! LGUESS - Code to tell whether input LHEXAD are feasible (LGUESS.NE.0) ! LHEXAD - 6 integer basis vectors giving the canonical grid steps ! LUI - 6 6-vectors dual to those of LU: [LUI]^t*[LU]=[I] ! ! output argument list: ! LHEXAD - 6 integer basis vectors giving the canonical grid steps ! LUI - 6 6-vectors dual to those of LU: [LUI]^t*[LU]=[I] ! WHEXAD - 6 real "spread" components in generalized grid-step units ! KT - The number of iterations it required to find the valid hexad ! ! attributes: ! language: f90 ! machine: ibm RS/6000 SP ! !$$$ end documentation block implicit none real(r_double) ,intent(in ) :: utarget(6) integer(i_long),intent(in ) :: lguess real(r_double) ,intent( out) :: whexad(6) integer(i_long),intent(inout) :: lhexad(3,6),lui(6,6) integer(i_long),intent( out) :: kt integer(i_long) ihexad(3,6),ilui(6,6) ! defaults integer(i_long) newlhex(3,2:6),newlui(6,2:6),lui1(6) real(r_double) wt(6) integer(i_long) kp(6,6),ksg(6,6) real(r_double) bcmin,bcmins,u,w1 integer(i_long) i,it,j,k,k1or3,kfirst,ksign,l DATA KP /1,2,6,3,4,5, 2,1,4,6,5,3 & ! Permutation code. ,3,4,2,5,6,1, 4,3,6,2,1,5 & ! This line is previous + 2 (modulo 6) ,5,6,4,1,2,3, 6,5,2,4,3,1/ ! This line is previous + 2 (modulo 6) DATA KSG/1, 1, 1, 1, -1,-1, 1, 1, -1, 1, 1,-1 & ,1, 1, 1, 1, -1,-1, 1, 1, -1, 1, 1,-1 & ,1, 1, 1, 1, -1,-1, 1, 1, -1, 1, 1,-1/ DATA IHEXAD/ 1, 0, 0, 0,-1, 1 & , 0, 1, 0, 1, 0,-1 & , 0, 0, 1, -1, 1, 0/ DATA ILUI/1, 0, 0, 0, 1, 1, 0, 0, 0, -1, 0, 0 & ,0, 1, 0, 1, 0, 1, 0, 0, 0, 0,-1, 0 & ,0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0,-1/ bcmins=-epsilon(utarget) ! a criterion slightly < 0 avoids roundoff worries IF(LGUESS==0)THEN DO J=1,6 DO I=1,3 LHEXAD(I,J)=IHEXAD(I,J) ENDDO DO I=1,6 LUI(I,J)=ILUI(I,J) ENDDO ENDDO ENDIF ! Use initial estimate of hexad to compute implied weights directly. ! (Subsequent updates of these weights are done perturbatively to save time). DO I=1,6 WHEXAD(I)=zero ENDDO DO I=1,6 U=UTARGET(I) DO J=1,6 WHEXAD(J)=WHEXAD(J)+LUI(I,J)*U ENDDO ENDDO K1OR3=1 ! At iteration 1, WHEXAD(1) and (2) might be < 0. DO IT=1,100! 4000 ! this should be ample KT=IT ! report back how many iterations were needed L=0 BCMIN=BCMINS DO K=K1OR3,6 IF(WHEXAD(K) 0. ENDDO write(6,*)'GETHEX: ALL ITERATIONS USED UP. This should never happen' call stop2(68) END subroutine gethex subroutine indexxi4(n,arrin4,indx) !$$$ subprogram documentation block ! . . . . ! subprogram: indexxi4 ! prgrmmr: ! ! abstract: ! ! program history log: ! 2009-08-26 lueken - added subprogram doc block ! ! input argument list: ! n ! arrin4 ! indx ! ! output argument list: ! indx ! ! attributes: ! language: f90 ! machine: ! !$$$ end documentation block !-------- indexes an array arrin of length n, i.e. outputs the array indx !-------- such that arrin(indx(j)) is in ascending order for j=1,2,...,n. The !-------- input quantities n and arrin are not changed. implicit none integer(i_long),intent(in ) :: n integer(i_long),intent(in ) :: arrin4(n) integer(i_long),intent(inout) :: indx(n) integer(i_long) i,indxt,ir,j,l,q4 do j=1,n indx(j)=j end do if(n==1) return l=n/2+1 ir=n do if(l>1) then l=l-1 indxt=indx(l) q4=arrin4(indxt) else indxt=indx(ir) q4=arrin4(indxt) indx(ir)=indx(1) ir=ir-1 if(ir==1) then indx(1)=indxt return end if end if i=l j=l+l do while (j<=ir) if(j1) then l=l-1 indxt=indx(l) q8=arrin8(indxt) else indxt=indx(ir) q8=arrin8(indxt) indx(ir)=indx(1) ir=ir-1 if(ir==1) then indx(1)=indxt return end if end if i=l j=l+l do while (j<=ir) if(j0) nsmooth=0 ! set up halo update (only if nsmooth and/or nsmooth_shapiro > 0) filter(1)%nrows=0 if(nsmooth>0) filter(1)%nrows=1 if(nsmooth_shapiro>0) filter(1)%nrows=3_i_long filter(1)%nsmooth=nsmooth filter(1)%nsmooth_shapiro=nsmooth_shapiro call add_halox0(filter(1),filter(1)%nrows,ids,ide,jds,jde,ips,ipe,jps,jpe,mype,npes) call add_haloy0(filter(1),filter(1)%nrows,ids,ide,jds,jde,ips,ipe,jps,jpe,mype,npes) deallocate(filter(1)%gnorm_halox,stat=istat) allocate(filter(1)%gnorm_halox(ips:ipe)) filter(1)%gnorm_halox=one deallocate(filter(1)%gnorm_haloy,stat=istat) allocate(filter(1)%gnorm_haloy(jps:jpe)) filter(1)%gnorm_haloy=one if(nsmooth_shapiro>0) then call smther_two_gnorm(filter(1)%gnorm_halox,ids,ide,ips,ipe) call smther_two_gnorm(filter(1)%gnorm_haloy,jds,jde,jps,jpe) end if filter(1)%ngauss=ngauss filter(1)%npass=npass if(ifilt_ord==-4_i_long) then filter(1)%ifilt_ord=2_i_long filter(1)%new_factorization=.true. else filter(1)%ifilt_ord=ifilt_ord filter(1)%new_factorization=.false. end if ! compute binomial coefficients factor_binom=one if(.not.binom) factor_binom=zero binomial0=zero binomial0(1,1)=one binomial0(2,1)=one sumbin(1)=two do k=2,19 binomial0(1,k)=one binomial0(k+1,k)=one do i=2,k binomial0(i,k)=binomial0(i-1,k-1)+binomial0(i,k-1)*factor_binom end do sumbin(k)=zero do i=1,k+1 sumbin(k)=sumbin(k)+binomial0(i,k) end do end do do k=1,19 binomial0(:,k)=binomial0(:,k)/sumbin(k) end do kk=0 binomial=zero do k=1,19,2 kk=kk+1 binomial(1:kk,kk)=binomial0(1:kk,k) end do if(ifilt_ord==-4_i_long) binomial=two*binomial if(mype==0 .and. print_verbose) write(6,*)'INIT_RAF4: binomial weightings used:',binomial(1:npass,npass) ! gather some stats on input aspect tensor for informational purposes aspect_max=0 aspect_min=huge(aspect_min) do k=kps,kpe ivar=idvar(k) do j=jps,jpe do i=ips,ipe ! if(aspect(1,i,j,k)<.001_r_kind) write(6,*)' ivar,i,j,k,aspect(1,i,j,k)=',ivar,i,j,k,aspect(1,i,j,k) aspect_max(ivar,1)=max(aspect_max(ivar,1),sqrt(aspect(1,i,j,k))) aspect_min(ivar,1)=min(aspect_min(ivar,1),sqrt(aspect(1,i,j,k))) aspect_max(ivar,2)=max(aspect_max(ivar,2),sqrt(aspect(2,i,j,k))) aspect_min(ivar,2)=min(aspect_min(ivar,2),sqrt(aspect(2,i,j,k))) aspect_max(ivar,3)=max(aspect_max(ivar,3),sqrt(aspect(3,i,j,k))) aspect_min(ivar,3)=min(aspect_min(ivar,3),sqrt(aspect(3,i,j,k))) end do end do end do call mpi_reduce(aspect_max,aspect_max_all,3*nvars,mpi_real4,mpi_max,0,mpi_comm_world,ierr) call mpi_reduce(aspect_min,aspect_min_all,3*nvars,mpi_real4,mpi_min,0,mpi_comm_world,ierr) if(mype==0 .and. print_verbose) then write(6,*)' corlen multipliers for additive gaussians: ' do igauss=1,ngauss write(6,*)' sqrt(rgauss(',igauss,') = ',sqrt(rgauss(igauss)) end do srgauss_min=sqrt(minval(rgauss)) srgauss_max=sqrt(maxval(rgauss)) do ivar=1,nvars if(kvar_start(ivar)==kvar_end(ivar)) then write(6,*)' corlen ranges for 2d variable ',trim(var_names(ivar)) write(6,*)' min-min, min, max, max-max (1) = ', & srgauss_min*aspect_min_all(ivar,1), & aspect_min_all(ivar,1), & aspect_max_all(ivar,1), & srgauss_max*aspect_max_all(ivar,1) write(6,*)' min-min, min, max, max-max (2) = ', & srgauss_min*aspect_min_all(ivar,2), & aspect_min_all(ivar,2), & aspect_max_all(ivar,2), & srgauss_max*aspect_max_all(ivar,2) else write(6,*)' corlen ranges for 3d variable ',trim(var_names(ivar)) write(6,*)' min-min, min, max, max-max (1) = ', & srgauss_min*aspect_min_all(ivar,1), & aspect_min_all(ivar,1), & aspect_max_all(ivar,1), & srgauss_max*aspect_max_all(ivar,1) write(6,*)' min-min, min, max, max-max (2) = ', & srgauss_min*aspect_min_all(ivar,2), & aspect_min_all(ivar,2), & aspect_max_all(ivar,2), & srgauss_max*aspect_max_all(ivar,2) write(6,*)' min-min, min, max, max-max (3) = ', & srgauss_min*aspect_min_all(ivar,3), & aspect_min_all(ivar,3), & aspect_max_all(ivar,3), & srgauss_max*aspect_max_all(ivar,3) end if end do end if ! get all directions and smoothing coefficients lhexadx=0 ; lhexady=0 ; lhexadz=0 epstest=10._r_double*epsilon(epstest) do k=kps,kpe ivar=idvar(k) ivar_start=kvar_start(ivar) ivar_end=kvar_end(ivar) ixstart=ipe ; ixend=ips ; ixinc=-1 lguess=0 do j=jps,jpe ixtemp=ixstart ; ixstart=ixend ; ixend=ixtemp ; ixinc=-ixinc do i=ixstart,ixend,ixinc if(triad4.and.ivar_start==ivar_end) then aspect8(1)=aspect(1,i,j,k) aspect8(2)=aspect(2,i,j,k) aspect8(3)=aspect(6,i,j,k) call gettri4(aspect8,lguess,ltriadlast,lui_triad,whexad8) lhexadlast=0 do kk=1,4 lhexadlast(1,kk)=ltriadlast(1,kk) lhexadlast(2,kk)=ltriadlast(2,kk) lhexadlast(3,kk)=0 end do whexad8(5)=zero whexad8(6)=zero else aspect8(1:6)=aspect(1:6,i,j,k) call gethex(aspect8,lguess,lhexadlast,lui,whexad8,kt) end if aspect(1:7,i,j,k)=zero do kk=1,6 if(whexad8(kk)>epstest) then jumpx=lhexadlast(1,kk) ! make all directions positive and jumpy=lhexadlast(2,kk) ! assign color jumpz=lhexadlast(3,kk) if(jumpz/=0.and.ivar_start==ivar_end) exit ! if 2-d, strings of interest are x-y only if(jumpz<0) then jumpx=-jumpx ; jumpy=-jumpy ; jumpz=-jumpz end if if(jumpz==0) then if(jumpy<0) then jumpx=-jumpx ; jumpy=-jumpy end if if(jumpy==0.and.jumpx<0) jumpx=-jumpx end if if(triad4.and.ivar_start==ivar_end) then call what_color_is_triad(jumpx,jumpy,icolor,3_i_long) else call what_color_is(jumpx,jumpy,jumpz,icolor) end if lhexadx(i,j,k,icolor)=jumpx ; lhexady(i,j,k,icolor)=jumpy ; lhexadz(i,j,k,icolor)=jumpz aspect(icolor,i,j,k)=whexad8(kk) end if end do lguess=1 end do end do end do ! big loop over all colors do icolor=1,7 ! get all string starting addresses and lengths m=0 do k=kps,kpe ivar=idvar(k) ivar_start=kvar_start(ivar) do j=jps,jpe do i=ips,ipe jumpz=lhexadz(i,j,k,icolor) km=k-jumpz ! note jumpz always >= 0 by construction jumpx=lhexadx(i,j,k,icolor) ; jumpy=lhexady(i,j,k,icolor) if(km0) then if(nstrings>0) then do m=1,nstrings len=1 jumpx=i1filter(1,m) ; jumpy=i1filter(2,m) ; jumpz=i1filter(3,m) ivar=i2filter(5,m) jumpxmax(ivar)=max(jumpx,jumpxmax(ivar)) jumpymax(ivar)=max(jumpy,jumpymax(ivar)) jumpzmax(ivar)=max(jumpz,jumpzmax(ivar)) jumpxmin(ivar)=min(jumpx,jumpxmin(ivar)) jumpymin(ivar)=min(jumpy,jumpymin(ivar)) jumpzmin(ivar)=min(jumpz,jumpzmin(ivar)) i=i2filter(1,m) ; j=i2filter(2,m) ; k=i2filter(3,m) ivar_end=kvar_end(ivar) do lentest=len+1 ktest=k+jumpz if(ktest>ivar_end) then i2filter(4,m)=len npoints_send=npoints_send+len exit end if itest=max(ips-1,min(i+jumpx,ipe+1)) jtest=max(jps-1,min(j+jumpy,jpe+1)) ktest=max(kps-1,min(ktest ,kpe+1)) if(jumpx/=lhexadx(itest,jtest,ktest,icolor).or. & jumpy/=lhexady(itest,jtest,ktest,icolor).or. & jumpz/=lhexadz(itest,jtest,ktest,icolor)) then i2filter(4,m)=len npoints_send=npoints_send+len exit end if len=lentest i=itest ; j=jtest ; k=ktest end do end do end if ! Begin computation of alpha, beta, the recursive filter coefficients. ! It is necessary to have contiguous strings for this process, so first ! must label strings and assign them to processors so the load is evenly distributed ! when string pieces are gathered together into full strings ! assign global label, origin and destination pe number to each string piece ! (global label is starting i,j,k closest to edge of global domain) call string_label(i1filter,i2filter,nstrings,label_string,npoints_recv, & nvars,kvar_start,kvar_end, & ids, ide, jds, jde, kds, kde, & ! domain indices ips, ipe, jps, jpe, kps, kpe, & ! patch indices mype,npes) filter(icolor)%npointsmax=max(npoints_recv(mype),npoints_send) if(npes==1) then filter(icolor)%npointsmaxall=filter(icolor)%npointsmax else call mpi_allreduce(filter(icolor)%npointsmax, & filter(icolor)%npointsmaxall,1,mpi_integer4,mpi_max,mpi_comm_world,ierr) end if filter(icolor)%npoints_send=npoints_send filter(icolor)%npoints_recv=npoints_recv(mype) allocate(info_string(8,max(1,npoints_recv(mype)))) allocate(aspect_full(max(1,npoints_recv(mype)))) ! assemble full strings deallocate(filter(icolor)%nsend,stat=istat) deallocate(filter(icolor)%ndsend,stat=istat) deallocate(filter(icolor)%nrecv,stat=istat) deallocate(filter(icolor)%ndrecv,stat=istat) deallocate(filter(icolor)%ia,stat=istat) deallocate(filter(icolor)%ja,stat=istat) deallocate(filter(icolor)%ka,stat=istat) allocate(filter(icolor)%nsend(0:npes-1)) allocate(filter(icolor)%ndsend(0:npes)) allocate(filter(icolor)%nrecv(0:npes-1)) allocate(filter(icolor)%ndrecv(0:npes)) allocate(filter(icolor)%ia(max(1,npoints_send))) allocate(filter(icolor)%ja(max(1,npoints_send))) allocate(filter(icolor)%ka(max(1,npoints_send))) call string_assemble4(i1filter,i2filter,nstrings,label_string, & npoints_send,npoints_recv(mype),aspect,icolor, & info_string,aspect_full, & filter(icolor)%nsend,filter(icolor)%ndsend, & filter(icolor)%nrecv,filter(icolor)%ndrecv, & filter(icolor)%ia,filter(icolor)%ja,filter(icolor)%ka, & ips,ipe,jps,jpe,kps,kpe,mype,npes) ! organize full strings for processing deallocate(filter(icolor)%ib,stat=istat) allocate(filter(icolor)%ib(max(1,npoints_recv(mype)))) if(npoints_recv(mype)>0) then call sort_strings4(info_string,aspect_full,npoints_recv(mype),filter(icolor)%ib,nvars) ! count number of strings call count_strings(info_string,filter(icolor)%nstrings,nstrings_var,nvars,npoints_recv(mype)) ! compute desired alpha and beta for final filter deallocate(filter(icolor)%istart,stat=istat) allocate(filter(icolor)%istart(filter(icolor)%nstrings+1)) if(filter(1)%new_factorization) then deallocate(filter(icolor)%fmat,stat=istat) deallocate(filter(icolor)%fmat0,stat=istat) allocate(filter(icolor)%fmat(2,npoints_recv(mype),npass,ngauss,2)) allocate(filter(icolor)%fmat0(npoints_recv(mype),npass,ngauss,2)) do igauss=1,ngauss call new_alpha_beta4(info_string,aspect_full,rgauss(igauss), & filter(icolor)%fmat,filter(icolor)%fmat0,igauss,ngauss, & filter(icolor)%istart,npoints_recv(mype),binomial,npass, & lenbar,lenmax,lenmin,npoints1,nvars) end do else deallocate(filter(icolor)%lnf,stat=istat) deallocate(filter(icolor)%bnf,stat=istat) allocate(filter(icolor)%lnf(ifilt_ord,npoints_recv(mype),npass,ngauss)) allocate(filter(icolor)%bnf(npoints_recv(mype),npass,ngauss)) do igauss=1,ngauss call alpha_beta4(info_string,aspect_full,rgauss(igauss), & filter(icolor)%lnf,filter(icolor)%bnf,igauss,ngauss, & filter(icolor)%istart,npoints_recv(mype),binomial,npass, & ifilt_ord,lenbar,lenmax,lenmin,npoints1,nvars) end do end if end if deallocate(info_string) deallocate(aspect_full) else filter(icolor)%npoints_send=0 ! here if no strings for this color filter(icolor)%npoints_recv=0 filter(icolor)%npointsmax=0 filter(icolor)%npointsmaxall=0 end if if(npes==1) then lenbarall(:,icolor)=lenbar nstrings_varall(:,icolor)=nstrings_var lenmaxall(:,icolor)=lenmax lenminall(:,icolor)=lenmin totalpoints1(:,icolor)=npoints1 jumpxmaxall(:,icolor)=jumpxmax jumpymaxall(:,icolor)=jumpymax jumpzmaxall(:,icolor)=jumpzmax jumpxminall(:,icolor)=jumpxmin jumpyminall(:,icolor)=jumpymin jumpzminall(:,icolor)=jumpzmin else call mpi_reduce(lenbar,lenbarall(1,icolor),nvars,mpi_real8,mpi_sum,0,mpi_comm_world,ierr) call mpi_reduce(nstrings_var,nstrings_varall(1,icolor),nvars, & mpi_integer4,mpi_sum,0,mpi_comm_world,ierr) call mpi_reduce(lenmax,lenmaxall(1,icolor),nvars,mpi_integer4,mpi_max,0,mpi_comm_world,ierr) call mpi_reduce(lenmin,lenminall(1,icolor),nvars,mpi_integer4,mpi_min,0,mpi_comm_world,ierr) call mpi_reduce(npoints1,totalpoints1(1,icolor),nvars,mpi_integer4,mpi_sum,0,mpi_comm_world,ierr) call mpi_reduce(jumpxmax,jumpxmaxall(1,icolor),nvars,mpi_integer4,mpi_max,0,mpi_comm_world,ierr) call mpi_reduce(jumpymax,jumpymaxall(1,icolor),nvars,mpi_integer4,mpi_max,0,mpi_comm_world,ierr) call mpi_reduce(jumpzmax,jumpzmaxall(1,icolor),nvars,mpi_integer4,mpi_max,0,mpi_comm_world,ierr) call mpi_reduce(jumpxmin,jumpxminall(1,icolor),nvars,mpi_integer4,mpi_min,0,mpi_comm_world,ierr) call mpi_reduce(jumpymin,jumpyminall(1,icolor),nvars,mpi_integer4,mpi_min,0,mpi_comm_world,ierr) call mpi_reduce(jumpzmin,jumpzminall(1,icolor),nvars,mpi_integer4,mpi_min,0,mpi_comm_world,ierr) end if end do ! end big loop over all colors ! print out diagnostics for each variable if(mype==0 .and. print_verbose) then do ivar=1,nvars write(6,*)' STRING STATS FOLLOW FOR VARIABLE #',ivar,': ',trim(var_names(ivar)) do icolor=1,7 if(nstrings_varall(ivar,icolor).gt.0) then totalpoints=nint(lenbarall(ivar,icolor)) lenbarall(ivar,icolor)=lenbarall(ivar,icolor)/nstrings_varall(ivar,icolor) write(6,*)' string stats for ivar,icolor=',ivar,icolor write(6,'(" ave string length=",f8.2)')lenbarall(ivar,icolor) write(6,'(" max string length=",i8)')lenmaxall(ivar,icolor) write(6,'(" min string length=",i8)')lenminall(ivar,icolor) write(6,'(" total num strings=",i9)')nstrings_varall(ivar,icolor) write(6,'(" total num points=",i9)')totalpoints write(6,'(" num len 1 strings=",i9)')totalpoints1(ivar,icolor) write(6,'(" jumpxmin,max=",2i12)')jumpxminall(ivar,icolor),jumpxmaxall(ivar,icolor) write(6,'(" jumpymin,max=",2i12)')jumpyminall(ivar,icolor),jumpymaxall(ivar,icolor) write(6,'(" jumpzmin,max=",2i12)')jumpzminall(ivar,icolor),jumpzmaxall(ivar,icolor) end if end do end do end if ! call adjoint_check4(filter,ngauss,ips,ipe,jps,jpe,kps,kpe,mype,npes) call normalize2_raf4(filter,filter(1)%ngauss,normal, & ids, ide, jds, jde, kde, & ! domain indices ips, ipe, jps, jpe, kps, kpe, & ! patch indices mype, npes) !DEBUG test output ldebug=.false. if(ldebug) then if(mype==0 .and. print_verbose) write(6,*) 'normalization finished' allocate(rout (ips:ipe,jps:jpe)) allocate(routa(ips:ipe,jps:jpe)) if(mype==0) open(32,form='unformatted') do k=kds,kde if(k>=kps.and.k<=kpe) then kk=k-kps+1 do j=jps,jpe do i=ips,ipe rout(i,j)=filter(1)%amp(1,i,j,k) end do end do else rout=zero_single end if call mpi_reduce(rout,routa,(ipe-ips+1)*(jpe-jps+1), & mpi_real4,mpi_sum,0,mpi_comm_world,ierr) if(mype==0 .and. print_verbose) then write(32) routa write(6,*) 'amp:',k,maxval(routa),minval(routa) end if end do if(mype==0) close(32) deallocate(rout) deallocate(routa) end if !DEBUG return end subroutine init_raf4 subroutine normalize2_raf4(filter,ngauss,normal, & ids, ide, jds, jde, kde, & ! domain indices ips, ipe, jps, jpe, kps, kpe, & ! patch indices mype, npes) !$$$ subprogram documentation block ! . . . ! subprogram: normalize2_raf4 ! ! prgrmmr: ! ! abstract: ! ! program history log: ! 2008-04-22 safford -- add subprogram doc block, rm unused vars ! ! input argument list: ! ngauss - ! normal - ! ids, ide, jds, jde, kde - domain indices ! ips, ipe, jps, jpe, kps, kpe - patch indices ! mype - mpi task id ! npes - ! filter ! ! output argument list: ! filter ! ! attributes: ! language: f90 ! machine: ibm RS/6000 SP ! !$$$ end documentation block implicit none INTEGER(i_long) ,INTENT(IN ) :: ids, ide, jds, jde, kde, & ! domain indices ips, ipe, jps, jpe, kps, kpe ! patch indices INTEGER(i_long) ,INTENT(IN ) :: & ngauss,normal,mype, npes TYPE(filter_cons),INTENT(INOUT) :: filter(7) real(r_single) ranvec(2, ngauss,ips:ipe, jps:jpe, kps:kpe ) real(r_single) bigg( ngauss,ips:ipe, jps:jpe, kps:kpe ) integer(i_long) i,igauss,j,k,loop,nsamples,ierror real(r_single) this_one1,this_one2 integer(i_long) istat,ii integer(i_llong) jj,j1,j2,j3 integer(i_byte) flips(2**27-8) logical:: independent_of_npes,print_verbose print_verbose = .false. if(verbose)print_verbose=.true. deallocate(filter(1)%amp,stat=istat) allocate(filter(1)%amp(ngauss,ips:ipe,jps:jpe,kps:kpe)) if(normal==0) then filter(1)%amp=one return end if ! read in fixed set of random flips if(mype==0) then open(997433,file='random_flips',form='unformatted') read(997433) flips close(997433) end if call mpi_bcast(flips,2**27-8,mpi_integer1,0,mpi_comm_world,ierror) nsamples=abs(normal)/2 independent_of_npes=normal<0 bigg=zero ii=-1 if(independent_of_npes) then; jj= 0 else; jj=(2**27-8)*mype/npes end if do loop=1,nsamples if(mype==0.and.mod(loop-1,10_i_long)==0 .and. print_verbose) write(6,*) 'normalization step:',loop,'/',nsamples j1=(int(loop,i_llong)-1)*kde ranvec=zero_single do k=kps,kpe j2=(j1+k-1)*jde do j=jps,jpe j3=(j2+j-1)*ide do i=ips,ipe if(independent_of_npes) then jj=(j3+i-1)*2+8 ii=mod(jj,8_i_llong) jj=mod(jj/8_i_llong,2_i_llong**27-8) else ii=mod(ii+1,8_i_long) if(ii==0) jj=jj+1_i_llong if(jj>2**27-8_i_llong) jj=1_i_llong end if this_one1=-one if(btest(flips(jj),ii)) this_one1=one ii=mod(ii+1,8_i_long) if(.not.independent_of_npes) then if(ii==0) jj=jj+1_i_llong if(jj>2**27-8_i_llong) jj=1_i_llong end if this_one2=-one if(btest(flips(jj),ii)) this_one2=one do igauss=1,ngauss ranvec(1,igauss,i,j,k)=this_one1 ranvec(2,igauss,i,j,k)=this_one2 end do end do end do end do call rad_sm24_ad(ranvec,filter,ngauss,ids,ide,jds,jde,ips,ipe,jps,jpe,kps,kpe,npes) do k=kps,kpe do j=jps,jpe do i=ips,ipe do igauss=1,ngauss bigg(igauss,i,j,k)=bigg(igauss,i,j,k)+ranvec(1,igauss,i,j,k)**2+ranvec(2,igauss,i,j,k)**2 end do end do end do end do end do do k=kps,kpe do j=jps,jpe do i=ips,ipe do igauss=1,ngauss filter(1)%amp(igauss,i,j,k)=one/sqrt(bigg(igauss,i,j,k)/(two*nsamples)) end do end do end do end do end subroutine normalize2_raf4 subroutine one_color4(g,filter,ngauss,ipass,ifilt_ord, & nstrings,istart,ips,ipe,jps,jpe,kps,kpe,npes) !$$$ subprogram documentation block ! . . . ! subprogram: one_color4 ! ! prgrmmr: ! ! abstract: apply one forward-backward recursive filter for one color ! ! program history log: ! 2008-04-22 safford -- add subprogram doc block, rm unused vars ! ! input argument list: ! g - input--field on grid, output--filtered field on grid ! filter - ! ngauss - ! ipass - total number of contiguous string points ! ifilt_ord - ! nstrings - ! istart - ! ips, ipe, jps, jpe, kps, kpe - patch indices ! npes - ! ! output argument list: ! g - input--field on grid, output--filtered field on grid ! ! attributes: ! language: f90 ! machine: ibm RS/6000 SP ! !$$$ end documentation block implicit none INTEGER(i_long) ,INTENT(IN ) :: ips, ipe, jps, jpe, kps, kpe INTEGER(i_long) ,INTENT(IN ) :: & npes,ngauss INTEGER(i_long) ,INTENT(IN ) :: & ipass ! total number of contiguous string points integer(i_long) ,intent(in ) :: ifilt_ord real(r_single), DIMENSION( ngauss,ips:ipe, jps:jpe, kps:kpe ),INTENT(INOUT) :: & g ! input--field on grid, output--filtered field on grid integer(i_long) ,intent(in ) :: nstrings integer(i_long) ,intent(in ) :: istart(nstrings+1) type(filter_cons) ,intent(in ) :: filter real(r_single) work(ngauss,max(1,filter%npointsmax),2) real(r_single) work2(max(1,filter%npointsmax)) integer(i_long) i,ierr,igauss,j,l,mpi_string !-- gather up strings call mpi_type_contiguous(ngauss,mpi_real4,mpi_string,ierr) call mpi_type_commit(mpi_string,ierr) if(filter%npoints_send>0) then do i=1,filter%npoints_send do igauss=1,ngauss work(igauss,i,1)=g(igauss,filter%ia(i),filter%ja(i),filter%ka(i)) end do end do end if if(npes==1.and.filter%npoints_recv>0) then do i=1,filter%npoints_recv do igauss=1,ngauss work(igauss,i,2)=work(igauss,i,1) end do end do else call mpi_alltoallv(work(1,1,1),filter%nsend,filter%ndsend,mpi_string, & work(1,1,2),filter%nrecv,filter%ndrecv,mpi_string,mpi_comm_world,ierr) end if if(filter%npoints_recv>0) then do igauss=1,ngauss do i=1,filter%npoints_recv work2(i)=work(igauss,filter%ib(i),2) end do do j=1,nstrings do i=istart(j)+1,istart(j+1)-1 do l=1,min(ifilt_ord,i-istart(j)) work2(i)=work2(i)-filter%lnf(l,i,ipass,igauss)*work2(i-l) end do end do do i=istart(j),istart(j+1)-1 work2(i)=filter%bnf(i,ipass,igauss)*work2(i) end do do i=istart(j+1)-2_i_long,istart(j),-1 do l=1,min(ifilt_ord,istart(j+1)-i-1) work2(i)=work2(i)-filter%lnf(l,i+l,ipass,igauss)*work2(i+l) end do end do end do do i=1,filter%npoints_recv work(igauss,i,1)=work2(i) end do end do !-- send strings back do i=1,filter%npoints_recv do igauss=1,ngauss work(igauss,filter%ib(i),2)=work(igauss,i,1) end do end do end if if(npes==1.and.filter%npoints_recv>0) then do i=1,filter%npoints_recv do igauss=1,ngauss work(igauss,i,1)=work(igauss,i,2) end do end do else call mpi_alltoallv(work(1,1,2),filter%nrecv,filter%ndrecv,mpi_string, & work(1,1,1),filter%nsend,filter%ndsend,mpi_string,mpi_comm_world,ierr) end if if(filter%npoints_send>0) then do i=1,filter%npoints_send do igauss=1,ngauss g(igauss,filter%ia(i),filter%ja(i),filter%ka(i))=work(igauss,i,1) end do end do end if call mpi_type_free(mpi_string,ierr) end subroutine one_color4 subroutine one_color24(g,filter,ngauss,ipass,ifilt_ord, & nstrings,istart,ips,ipe,jps,jpe,kps,kpe,npes) !$$$ subprogram documentation block ! . . . ! subprogram: one_color24 ! ! prgrmmr: ! ! abstract: apply one forward-backward recursive filter for one color ! ! program history log: ! 2008-04-22 safford -- add subprogram doc block, rm unused vars ! ! input argument list: ! g - input--field on grid, output--filtered field on grid ! filter - ! ngauss - ! ipass - total number of contiguous string points ! ifilt_ord - ! nstrings - ! istart - ! ips, ipe, jps, jpe, kps, kpe - patch indices ! npes - ! ! output argument list: ! g - input--field on grid, output--filtered field on grid ! ! attributes: ! language: f90 ! machine: ibm RS/6000 SP ! !$$$ end documentation block implicit none INTEGER(i_long) ,INTENT(IN ) :: & ips, ipe, jps, jpe, kps, kpe ! patch indices INTEGER(i_long) ,INTENT(IN ) :: & npes,ngauss INTEGER(i_long) ,INTENT(IN ) :: & ipass ! total number of contiguous string points integer(i_long) ,intent(in ) :: ifilt_ord real(r_single), DIMENSION(2,ngauss, ips:ipe, jps:jpe, kps:kpe ),INTENT(INOUT) :: & g ! input--field on grid, output--filtered field on grid integer(i_long) ,intent(in ) :: nstrings integer(i_long) ,intent(in ) :: istart(nstrings+1) type(filter_cons) ,intent(in ) :: filter real(r_single) work(2,ngauss,max(1,filter%npointsmax),2) real(r_single) work2(max(1,filter%npointsmax)) integer(i_long) i,ierr,igauss,ii,j,l,mpi_string !-- gather up strings call mpi_type_contiguous(2*ngauss,mpi_real4,mpi_string,ierr) call mpi_type_commit(mpi_string,ierr) if(filter%npoints_send>0) then do i=1,filter%npoints_send do igauss=1,ngauss work(1,igauss,i,1)=g(1,igauss,filter%ia(i),filter%ja(i),filter%ka(i)) work(2,igauss,i,1)=g(2,igauss,filter%ia(i),filter%ja(i),filter%ka(i)) end do end do end if if(npes==1.and.filter%npoints_recv>0) then do i=1,filter%npoints_recv do igauss=1,ngauss work(1,igauss,i,2)=work(1,igauss,i,1) work(2,igauss,i,2)=work(2,igauss,i,1) end do end do else call mpi_alltoallv(work(1,1,1,1),filter%nsend,filter%ndsend,mpi_string, & work(1,1,1,2),filter%nrecv,filter%ndrecv,mpi_string,mpi_comm_world,ierr) end if if(filter%npoints_recv>0) then do igauss=1,ngauss do ii=1,2 do i=1,filter%npoints_recv work2(i)=work(ii,igauss,filter%ib(i),2) end do do j=1,nstrings do i=istart(j)+1,istart(j+1)-1 do l=1,min(ifilt_ord,i-istart(j)) work2(i)=work2(i)-filter%lnf(l,i,ipass,igauss)*work2(i-l) end do end do do i=istart(j),istart(j+1)-1 work2(i)=filter%bnf(i,ipass,igauss)*work2(i) end do do i=istart(j+1)-2,istart(j),-1 do l=1,min(ifilt_ord,istart(j+1)-i-1) work2(i)=work2(i)-filter%lnf(l,i+l,ipass,igauss)*work2(i+l) end do end do end do do i=1,filter%npoints_recv work(ii,igauss,i,1)=work2(i) end do end do end do !-- send strings back do i=1,filter%npoints_recv do igauss=1,ngauss work(1,igauss,filter%ib(i),2)=work(1,igauss,i,1) work(2,igauss,filter%ib(i),2)=work(2,igauss,i,1) end do end do end if if(npes==1.and.filter%npoints_recv>0) then do i=1,filter%npoints_recv do igauss=1,ngauss work(1,igauss,i,1)=work(1,igauss,i,2) work(2,igauss,i,1)=work(2,igauss,i,2) end do end do else call mpi_alltoallv(work(1,1,1,2),filter%nrecv,filter%ndrecv,mpi_string, & work(1,1,1,1),filter%nsend,filter%ndsend,mpi_string,mpi_comm_world,ierr) end if if(filter%npoints_send>0) then do i=1,filter%npoints_send do igauss=1,ngauss g(1,igauss,filter%ia(i),filter%ja(i),filter%ka(i))=work(1,igauss,i,1) g(2,igauss,filter%ia(i),filter%ja(i),filter%ka(i))=work(2,igauss,i,1) end do end do end if call mpi_type_free(mpi_string,ierr) end subroutine one_color24 SUBROUTINE raf4(g,filter,ngauss,ids,ide,jds,jde,ips,ipe,jps,jpe,kps,kpe,npes) !$$$ subprogram documentation block ! . . . ! subprogram: raf4 ! ! prgrmmr: ! ! abstract: 1st half of recursive anisotropic self-adjoint filter (full-strings version) ! ! program history log: ! 2008-04-22 safford -- add subprogram doc block, rm unused uses ! ! input argument list: ! g - input--field on grid, output--filtered field on grid ! filter - ! ngauss - ! ids, ide, jds, jde - domain indices ! ips, ipe, jps, jpe, kps, kpe - patch indices ! npes - ! ! output argument list: ! g - input--field on grid, output--filtered field on grid ! ! attributes: ! language: f90 ! machine: ibm RS/6000 SP ! !$$$ end documentation block implicit none INTEGER(i_long) ,INTENT(IN ) :: ids,ide,jds,jde, & ips,ipe,jps,jpe,kps,kpe INTEGER(i_long) ,INTENT(IN ) :: npes,ngauss real(r_single), DIMENSION(ngauss, ips:ipe, jps:jpe, kps:kpe ),INTENT(INOUT) :: & g ! input--field to be filtered, output--filtered field TYPE(filter_cons) ,intent(in ) :: filter(7) ! structure defining recursive filter integer(i_long) i,icolor,ipass,igauss,j,k,iadvance,iback ! multiply by amp first do k=kps,kpe do j=jps,jpe do i=ips,ipe do igauss=1,ngauss g(igauss,i,j,k)=filter(1)%amp(igauss,i,j,k)*g(igauss,i,j,k) end do end do end do end do ! apply smoothing if nsmooth or nsmooth_shapiro > 0 if(kps<=kpe) then do i=1,max(filter(1)%nsmooth,filter(1)%nsmooth_shapiro) call smther(filter(1),g,filter(1)%nrows,ngauss,filter(1)%nsmooth,filter(1)%nsmooth_shapiro, & ids,ide,jds,jde,ips,ipe,jps,jpe,kps,kpe, & npes,filter(1)%gnorm_halox,filter(1)%gnorm_haloy,.false.) end do end if if(filter(1)%npass>0) then do ipass=1,filter(1)%npass do icolor=7,1,-1 if(filter(icolor)%npointsmaxall>0) then if(filter(1)%new_factorization) then iadvance=1 iback=2_i_long call one_color4_new_factorization(g,filter(icolor),ngauss,ipass,filter(1)%ifilt_ord, & filter(icolor)%nstrings,filter(icolor)%istart, & ips,ipe,jps,jpe,kps,kpe,npes,iadvance,iback) else call one_color4(g,filter(icolor),ngauss,ipass,filter(1)%ifilt_ord, & filter(icolor)%nstrings,filter(icolor)%istart,ips,ipe,jps,jpe,kps,kpe,npes) end if end if end do end do end if end subroutine raf4 SUBROUTINE raf_sm4(g,filter,ngauss,ips,ipe,jps,jpe,kps,kpe,npes) !$$$ subprogram documentation block ! . . . ! subprogram: raf_sm4 ! ! prgrmmr: ! ! abstract: 1st half of recursive anisotropic self-adjoint filter (full-strings version) ! ! program history log: ! 2008-04-22 safford -- add subprogram doc block, rm unused uses ! ! input argument list: ! g - input--field on grid, output--filtered field on grid ! filter - ! ngauss - ! ips, ipe, jps, jpe, kps, kpe - patch indices ! npes - ! ! output argument list: ! g - input--field on grid, output--filtered field on grid ! ! attributes: ! language: f90 ! machine: ibm RS/6000 SP ! !$$$ end documentation block implicit none INTEGER(i_long) ,INTENT(IN ) :: ips, ipe, jps, jpe, kps, kpe ! patch indices INTEGER(i_long) ,INTENT(IN ) :: & npes,ngauss real(r_single), DIMENSION( ngauss,ips:ipe, jps:jpe, kps:kpe ),INTENT(INOUT) :: & g ! input--field to be filtered, output--filtered field TYPE(filter_cons) ,intent(in ) :: filter(7) ! structure defining recursive filter integer(i_long) icolor,ipass,iadvance,iback if(filter(1)%npass>0) then do ipass=1,filter(1)%npass do icolor=7,1,-1 if(filter(icolor)%npointsmaxall>0) then if(filter(1)%new_factorization) then iadvance=1 iback=2_i_long call one_color4_new_factorization(g,filter(icolor),ngauss,ipass,filter(1)%ifilt_ord, & filter(icolor)%nstrings,filter(icolor)%istart, & ips,ipe,jps,jpe,kps,kpe,npes,iadvance,iback) else call one_color4(g,filter(icolor),ngauss,ipass,filter(1)%ifilt_ord, & filter(icolor)%nstrings,filter(icolor)%istart,ips,ipe,jps,jpe,kps,kpe,npes) end if end if end do end do end if end subroutine raf_sm4 subroutine sort_strings4(info_string,aspect_full,npoints_recv,ib,nvars) !$$$ subprogram documentation block ! . . . ! subprogram: sort_strings4 ! ! prgrmmr: ! ! abstract: ! ! program history log: ! 2008-04-22 safford -- add subprogram doc block ! ! input argument list: ! info_string - 1 - distance from origin to current point ! 2,3,4 - origin coordinates ! 5,6,7,8 - jumpx,jumpy,jumpz,ivar for this string ! aspect_full - ! npoints_recv - ! ! output argument list: ! info_string - ! aspect_full - ! ib ! ! attributes: ! language: f90 ! machine: ibm RS/6000 SP ! !$$$ end documentation block IMPLICIT NONE integer(i_long) ,intent(in ) :: nvars INTEGER(i_long) ,INTENT(IN ) :: npoints_recv INTEGER(i_short), DIMENSION( 8, npoints_recv ),INTENT(INOUT) :: & info_string ! 1---- distance from origin to current point ! 2,3,4-- origin coordinates ! 5,6,7,8-- jumpx,jumpy,jumpz,ivar for this string real(r_single) , DIMENSION( npoints_recv ) ,INTENT(INOUT) :: & aspect_full integer(i_long) ,intent( out) :: ib(npoints_recv) integer(i_long) ib0(npoints_recv),ib1(npoints_recv) integer(i_llong) ij_origin(npoints_recv) integer(i_short) info2(8,npoints_recv) integer(i_long) icountvar(nvars),istartvar(nvars) real(r_single) aspect2(npoints_recv) integer(i_long) i,j integer(i_llong) idist,idistlen,idistmax,idistmin,idjxlen,idjxylen,idjxyzlen,idjxyzx0len,idjxyzxy0len integer(i_long) ii,k integer(i_llong) ix0,ix0len,ix0max,ix0min,iy0,iy0len,iy0max,iy0min,iz0,iz0len,iz0max integer(i_llong) iz0min,jumpx,jumpxlen,jumpxmax,jumpxmin,jumpy,jumpylen,jumpymax,jumpymin integer(i_llong) jumpz,jumpzlen,jumpzmax,jumpzmin ! sort by var first (but not with indexxi8, because destroys order of points within a subgroup) ii=0 icountvar=0 istartvar=0 ib0=0 do k=1,nvars do i=1,npoints_recv if(info_string(8,i)==k) then icountvar(k)=icountvar(k)+1 ii=ii+1 if(icountvar(k)==1) istartvar(k)=ii ib0(ii)=i do j=1,8 info2(j,ii)=info_string(j,i) end do aspect2(ii)=aspect_full(i) end if end do end do if(minval(ib0)==0) then write(6,*)' error in sort_strings4, problem with ib0' call stop2(68) end if ! now sort each section in usual way ! obtain range of jumpx,jumpy,originx,originy ij_origin=0 do k=1,nvars if(icountvar(k)==0) cycle jumpxmin=huge(jumpxmin) ; jumpxmax=-jumpxmin jumpymin=jumpxmin ; jumpymax=jumpxmax jumpzmin=jumpxmin ; jumpzmax=jumpxmax ix0min=jumpxmin ; ix0max=jumpxmax iy0min=jumpxmin ; iy0max=jumpxmax iz0min=jumpxmin ; iz0max=jumpxmax idistmin=jumpxmin ; idistmax=jumpxmax do i=istartvar(k),istartvar(k)+icountvar(k)-1 jumpx=info2(5,i) ; jumpy=info2(6,i) ; jumpz=info2(7,i) ix0=info2(2,i) ; iy0=info2(3,i) ; iz0=info2(4,i) idist=info2(1,i) jumpxmin=min(jumpx,jumpxmin) ; jumpxmax=max(jumpx,jumpxmax) jumpymin=min(jumpy,jumpymin) ; jumpymax=max(jumpy,jumpymax) jumpzmin=min(jumpz,jumpzmin) ; jumpzmax=max(jumpz,jumpzmax) ix0min=min(ix0,ix0min) ; ix0max=max(ix0,ix0max) iy0min=min(iy0,iy0min) ; iy0max=max(iy0,iy0max) iz0min=min(iz0,iz0min) ; iz0max=max(iz0,iz0max) idistmin=min(idist,idistmin) ; idistmax=max(idist,idistmax) end do jumpxlen=jumpxmax-jumpxmin+1 ; jumpylen=jumpymax-jumpymin+1 ; jumpzlen=jumpzmax-jumpzmin+1 idistlen=idistmax-idistmin+1 ix0len=ix0max-ix0min+1 ; iy0len=iy0max-iy0min+1 ; iz0len=iz0max-iz0min+1 idjxlen=idistlen*jumpxlen idjxylen=idjxlen*jumpylen idjxyzlen=idjxylen*jumpzlen idjxyzx0len=idjxyzlen*ix0len idjxyzxy0len=idjxyzx0len*iy0len do i=istartvar(k),istartvar(k)+icountvar(k)-1 jumpx=info2(5,i) ; jumpy=info2(6,i) ; jumpz=info2(7,i) ix0=info2(2,i) ; iy0=info2(3,i) ; iz0=info2(4,i) idist=info2(1,i) ij_origin(i)=idist-idistmin & +idistlen*(jumpx-jumpxmin) & +idjxlen*(jumpy-jumpymin) & +idjxylen*(jumpz-jumpzmin) & +idjxyzlen*(ix0-ix0min) & +idjxyzx0len*(iy0-iy0min) & +idjxyzxy0len*(iz0-iz0min) end do call indexxi8(icountvar(k),ij_origin(istartvar(k)),ib1(istartvar(k))) do i=istartvar(k),istartvar(k)+icountvar(k)-1 ib1(i)=ib1(i)+istartvar(k)-1 end do end do do i=1,npoints_recv ib(i)=ib0(ib1(i)) end do do i=1,npoints_recv do j=1,8 info_string(j,i)=info2(j,ib1(i)) end do aspect_full(i)=aspect2(ib1(i)) end do end subroutine sort_strings4 SUBROUTINE string_assemble4(i1filter,i2filter,nstrings,label_string, & npoints_send,npoints_recv,aspect,icolor, & info_string,aspect_full,nsend,ndsend,nrecv,ndrecv,ia,ja,ka, & ips,ipe,jps,jpe,kps,kpe,mype,npes) !$$$ subprogram documentation block ! . . . ! subprogram: string_assemble4 ! ! prgrmmr: ! ! abstract: ! ! program history log: ! 2008-04-22 safford -- add subprogram doc block ! ! input argument list: ! i1filter - i1filter(1-3,.)=jumpx,jumpy,jumpz ! i2filter - i2filter(1-5,.)=beginx,beginy,beginz,lenstring,ivar ! nstrings - ! label_string - label_string(1-3,.)=originx,originy,originz ! npoints_send - number of points to send for assembling strings ! npoints_recv - number of points for assembled strings ! aspect - aspect tensor numbers ! icolor - ! ips, ipe, jps, jpe, kps, kpe - patch indices ! mype - mpi task id ! npes - ! ! output argument list: ! info_string - 1---- distance from origin to current point ! - 2,3,4-- origin coordinates ! - 5,6,7,8-- jumpx,jumpy,jumpz,ivar for this string ! aspect_full - ! nsend,ndsend,nrecv,ndrecv ! ia,ja,ka ! ! attributes: ! language: f90 ! machine: ibm RS/6000 SP ! !$$$ end documentation block implicit none INTEGER(i_long) ,INTENT(IN ) :: ips, ipe, jps, jpe, kps, kpe ! patch indices INTEGER(i_long) ,INTENT(IN ) :: mype, npes INTEGER(i_long) ,INTENT(IN ) :: nstrings,icolor INTEGER(i_short), DIMENSION( 3, * ) ,INTENT(IN ) :: & i1filter ! i1filter(1-3,.)=jumpx,jumpy,jumpz INTEGER(i_short), DIMENSION( 5, * ) ,INTENT(IN ) :: & i2filter ! i2filter(1-5,.)=beginx,beginy,beginz,lenstring,ivar INTEGER(i_short), DIMENSION( 6, * ) ,INTENT(IN ) :: & label_string ! label_string(1-3,.)=originx,originy,originz ! label_string(4,.)=distance from origin to start of string ! label_string(5-6,.)=dest pe, ivar INTEGER(i_long) ,INTENT(IN ) :: & npoints_send, & ! number of points to send for assembling strings npoints_recv ! number of points for assembled strings real(r_single) , DIMENSION( 7, ips:ipe, jps:jpe, kps:kpe ),INTENT(IN ) :: & aspect ! aspect tensor numbers (recursive filter parameters derived ! from these) INTEGER(i_short), DIMENSION( 8, max(1,npoints_recv) ) ,INTENT( OUT) :: & info_string ! 1---- distance from origin to current point ! 2,3,4-- origin coordinates ! 5,6,7,8-- jumpx,jumpy,jumpz,ivar for this string real(r_single), DIMENSION( max(1,npoints_recv) ) ,INTENT( OUT) :: & aspect_full integer(i_long) ,intent( out) :: & nsend(0:npes-1),ndsend(0:npes),nrecv(0:npes-1),ndrecv(0:npes) integer(i_short) ,intent( out) :: ia(npoints_send),ja(npoints_send),ka(npoints_send) integer(i_short) string_info(8,npoints_send) real(r_single) full_aspect(npoints_send) integer(i_long) i,i0,idestpe,idist,ierr,ivar,j,j0,jumpx,jumpy,jumpz,k,k0,kk,len,m,mbuf,mpe,mpi_string1 integer(i_long) idestlast mbuf=0 ! setup string_info array nsend=0 if(nstrings>0) then idestlast=-1 do m=1,nstrings len=i2filter(4,m) jumpx=i1filter(1,m) ; jumpy=i1filter(2,m) ; jumpz=i1filter(3,m) i=i2filter(1,m) ; j=i2filter(2,m) ; k=i2filter(3,m) ; ivar=i2filter(5,m) i0=label_string(1,m) ; j0=label_string(2,m) ; k0=label_string(3,m) idist=label_string(4,m) idestpe=label_string(5,m) if(idestpe=0) idist=idist+1 end do end do end if if(mbuf/=npoints_send) then write(6,*)'STRING_ASSEMBLE4: ***PROBLEM*** mbuf /= npoints_send, mype,mbuf,npoints_send=', & mype,mbuf,npoints_send call stop2(68) end if ! now get remaining info necessary for using alltoall command ndsend(0)=0 do mpe=1,npes ndsend(mpe)=ndsend(mpe-1)+nsend(mpe-1) end do if(npes==1) then nrecv(0)=nsend(0) else call mpi_alltoall(nsend,1,mpi_integer, & nrecv,1,mpi_integer,mpi_comm_world,ierr) end if ndrecv(0)=0 do mpe=1,npes ndrecv(mpe)=ndrecv(mpe-1)+nrecv(mpe-1) end do if(npes==1) then do j=1,max(1,npoints_recv) do i=1,8 info_string(i,j)=string_info(i,j) end do aspect_full(j)=full_aspect(j) end do else call mpi_type_contiguous(8,mpi_integer2,mpi_string1,ierr) call mpi_type_commit(mpi_string1,ierr) call mpi_alltoallv(string_info,nsend,ndsend,mpi_string1, & info_string,nrecv,ndrecv,mpi_string1,mpi_comm_world,ierr) call mpi_type_free(mpi_string1,ierr) call mpi_alltoallv(full_aspect,nsend,ndsend,mpi_real4, & aspect_full,nrecv,ndrecv,mpi_real4,mpi_comm_world,ierr) end if end subroutine string_assemble4 SUBROUTINE string_label(i1filter,i2filter,nstrings,label_string,npoints_recv, & nvars,kvar_start,kvar_end, & ids, ide, jds, jde, kds, kde, & ! domain indices ips, ipe, jps, jpe, kps, kpe, & ! patch indices mype, npes) !$$$ subprogram documentation block ! . . . ! subprogram: string_label ! ! prgrmmr: ! ! abstract: assign global string labels to each string ! (global label is first i,j,k inside global domain) ! ! program history log: ! 2008-04-22 safford -- add subprogram doc block, rm unused uses ! ! input argument list: ! i1filter - i1filter(1-3,.)=jumpx,jumpy,jumpz ! i2filter - i2filter(1-5,.)=beginx,beginy,beginz,lenstring,ivar ! nstrings - ! npoints_recv - number of points for assembled strings ! nvars ! kvar_start,kvar_end ! ids, ide, jds, jde, kds, kde - domain indices ! ips, ipe, jps, jpe, kps, kpe - patch indices ! mype - mpi task id ! npes - ! ! output argument list: ! i1filter - i1filter(1-3,.)=jumpx,jumpy,jumpz ! i2filter - i2filter(1-5,.)=beginx,beginy,beginz,lenstring,ivar ! label_string - label_string(1-3,.)=originx,originy,originz ! ! attributes: ! language: f90 ! machine: ibm RS/6000 SP ! !$$$ end documentation block implicit none INTEGER(i_long) ,INTENT(IN ) :: ids, ide, jds, jde, kds, kde, & ! domain indices ips, ipe, jps, jpe, kps, kpe ! patch indices INTEGER(i_long) ,intent(in ) :: nstrings INTEGER(i_short), DIMENSION( 3, * ),INTENT(INout) :: & i1filter ! i1filter(1-3,.)=jumpx,jumpy,jumpz INTEGER(i_short), DIMENSION( 5, * ),INTENT(INout) :: & i2filter ! i2filter(1-5,.)=beginx,beginy,beginz,lenstring,ivar INTEGER(i_short), DIMENSION( 6, * ),INTENT( OUT) :: & label_string ! label_string(1-3,.)=originx,originy,originz ! label_string(4,.)=distance from origin to start of string ! label_string(5,.)=destination pe for string piece ! label_string(6,.)=ivar integer(i_long) ,intent(in ) :: mype,npes integer(i_long) ,intent( out) :: npoints_recv(0:npes-1) integer(i_long) ,intent(in ) :: nvars integer(i_long) ,intent(in ) :: kvar_start(nvars),kvar_end(nvars) integer(i_long) i,idist,idisttest,ierr,istring_pe,itest,ivar,ivar_end,ivar_start integer(i_long) j,jtest,jumpx,jumpy,jumpz,k,ktest,mpe,n,nstrings0 integer(i_llong) lastlabel INTEGER(i_short), DIMENSION( 6, (ipe-ips+1)*(jpe-jps+1)*(kpe-kps+1) ) :: label_string2 INTEGER(i_short), DIMENSION( 3, (ipe-ips+1)*(jpe-jps+1)*(kpe-kps+1) ) :: i1filter2 INTEGER(i_short), DIMENSION( 5, (ipe-ips+1)*(jpe-jps+1)*(kpe-kps+1) ) :: i2filter2 integer(i_llong) labelijk(max(1,nstrings)) integer(i_long) nrecv(0:npes-1),ndrecv(0:npes) integer(i_llong),allocatable::labelijk0(:) integer(i_long),allocatable::index(:) if(nstrings>0) then do n=1,nstrings jumpx=i1filter(1,n) ; jumpy=i1filter(2,n) ; jumpz=i1filter(3,n) i=i2filter(1,n) ; j=i2filter(2,n) ; k=i2filter(3,n) ; ivar=i2filter(5,n) ivar_start=kvar_start(ivar) ivar_end=kvar_end(ivar) idist=0 do idisttest=idist+1 ktest=k-jumpz if(ktestide) then label_string2(1,n)=i ; label_string2(2,n)=j ; label_string2(3,n)=k label_string2(4,n)=idist ; label_string2(6,n)=ivar exit end if jtest=j-jumpy if(jtestjde) then label_string2(1,n)=i ; label_string2(2,n)=j ; label_string2(3,n)=k label_string2(4,n)=idist ; label_string2(6,n)=ivar exit end if i=itest ; j=jtest ; k=ktest idist=idisttest end do ivar=label_string2(6,n) labelijk(n)=label_string2(1,n)+(ide-ids+1)*(label_string2(2,n)-1+(jde-jds+1)*(label_string2(3,n)-1 & +(kde-kds+1)*(ivar-1))) end do end if !-- assemble all string labels to pe 0, for assignment of pe numbers nrecv=0 if(npes==1) then nrecv(0)=nstrings else call mpi_allgather(nstrings,1,mpi_integer4,nrecv,1,mpi_integer4,mpi_comm_world,ierr) end if ndrecv(0)=0 do i=1,npes ndrecv(i)=ndrecv(i-1)+nrecv(i-1) end do nstrings0=ndrecv(npes) allocate(labelijk0(nstrings0)) if(npes==1) then do i=1,nstrings0 labelijk0(i)=labelijk(i) end do else call my_gatherv8(labelijk,nstrings,labelijk0,nstrings0,nrecv,ndrecv,npes) end if !------ sort strings so strings with same labels are adjacent, then assign adjacent strings to same pe. !------ then when we assemble all pieces, it is guaranteed that all pieces of every contiguous string !------ will end up on the same processor. if(mype==0) then allocate(index(nstrings0)) call indexxi8(nstrings0,labelijk0,index) lastlabel=-huge(lastlabel) istring_pe=0 do i=1,nstrings0 j=index(i) if(labelijk0(j)/=lastlabel) then lastlabel=labelijk0(j) istring_pe=mod(istring_pe+1,npes) end if labelijk0(j)=istring_pe end do deallocate(index) end if !---- now scatter pe destination numbers back if(npes==1) then do i=1,nstrings0 labelijk(i)=labelijk0(i) end do else call my_scatterv8(labelijk0,nstrings0,labelijk,nstrings,nrecv,ndrecv,npes) end if deallocate(labelijk0) !---- assign destination pe numbers and count up number of points at each destination pe nrecv=0 if(nstrings>0) then do i=1,nstrings mpe=labelijk(i) nrecv(mpe)=nrecv(mpe)+i2filter(4,i) label_string2(5,i)=mpe end do !---- reorder label_string in order of increasing pe number allocate(index(nstrings)) call indexxi8(nstrings,labelijk,index) do i=1,nstrings do j=1,6 label_string(j,i)=label_string2(j,index(i)) end do do j=1,3 i1filter2(j,i)=i1filter(j,index(i)) end do do j=1,5 i2filter2(j,i)=i2filter(j,index(i)) end do end do do i=1,nstrings do j=1,3 i1filter(j,i)=i1filter2(j,i) end do do j=1,5 i2filter(j,i)=i2filter2(j,i) end do end do deallocate(index) end if if(npes==1) then npoints_recv=nrecv else call mpi_allreduce(nrecv,npoints_recv,npes,mpi_integer4,mpi_sum,mpi_comm_world,ierr) end if end subroutine string_label subroutine my_gatherv8(local,nlocal,global,nglobal,nrecv,ndrecv,npes) !$$$ subprogram documentation block ! . . . . ! subprogram: my_gatherv8 ! prgmmr: ! ! abstract: workaround for possible problem with mpi_gatherv failure when nlocal = 0 for some processors ! ! program history log: ! 2009-08-27 lueken - added subprogram doc block ! ! input argument list: ! nlocal ! npes ! nglobal ! local ! global ! nrecv ! ndrecv ! ! output argument list: ! global ! ! attributes: ! language: f90 ! machine: ! !$$$ end documentation block implicit none integer(i_long) ,intent(in ) :: nlocal,npes,nglobal integer(i_llong),intent(in ) :: local(max(1,nlocal)) integer(i_llong),intent(inout) :: global(max(1,nglobal)) integer(i_long) ,intent(in ) :: nrecv(0:npes-1),ndrecv(0:npes) integer(i_long) nrecv1(0:npes-1),ndrecv1(0:npes) integer(i_long) i,n,nlocal1,ierr integer(i_llong) local1(max(1,nlocal)) integer(i_llong) global1(nglobal+npes+1) do i=0,npes-1 nrecv1(i)=max(nrecv(i),1) end do ndrecv1(0)=0 do i=1,npes ndrecv1(i)=ndrecv1(i-1)+nrecv1(i-1) end do local1(1)=0 if(nlocal>0) then do i=1,nlocal local1(i)=local(i) end do end if nlocal1=max(1,nlocal) call mpi_gatherv(local1,nlocal1,mpi_integer8,global1,nrecv1,ndrecv1,mpi_integer8,0,mpi_comm_world,ierr) do n=0,npes-1 if(nrecv(n)>0) then do i=1,nrecv(n) global(i+ndrecv(n))=global1(i+ndrecv1(n)) end do end if end do end subroutine my_gatherv8 subroutine my_scatterv8(global,nglobal,local,nlocal,nrecv,ndrecv,npes) !$$$ subprogram documentation block ! . . . . ! subprogram: my_scatterv8 ! prgmmr: ! ! abstract: workaround for possible problem with mpi_scatterv failure when nlocal = 0 for some processors ! ! program history log: ! 2009-08-27 lueken - added subprogram doc block ! ! input argument list: ! nlocal ! npes ! global ! nrecv ! ndrecv ! local ! nglobal ! ! output argument list: ! local ! ! attributes: ! language: f90 ! machine: ! !$$$ end documentation block implicit none integer(i_long) ,intent(in ) :: nlocal,npes,nglobal integer(i_llong),intent(inout) :: local(max(1,nlocal)) integer(i_llong),intent(in ) :: global(max(1,nglobal)) integer(i_long) ,intent(in ) :: nrecv(0:npes-1),ndrecv(0:npes) integer(i_long) nrecv1(0:npes-1),ndrecv1(0:npes) integer(i_long) i,n,nlocal1,ierr integer(i_llong) local1(max(1,nlocal)) integer(i_llong) global1(nglobal+npes+1) do i=0,npes-1 nrecv1(i)=max(nrecv(i),1) end do ndrecv1(0)=0 do i=1,npes ndrecv1(i)=ndrecv1(i-1)+nrecv1(i-1) end do do n=0,npes-1 if(nrecv(n)>0) then do i=1,nrecv(n) global1(i+ndrecv1(n))=global(i+ndrecv(n)) end do else global1(1+ndrecv1(n))=0 end if end do nlocal1=max(1,nlocal) call mpi_scatterv(global1,nrecv1,ndrecv1,mpi_integer8,local1,nlocal1,mpi_integer8,0,mpi_comm_world,ierr) if(nlocal>0) then do i=1,nlocal local(i)=local1(i) end do else local(1)=0 end if end subroutine my_scatterv8 subroutine what_color_is(i1,i2,i3,color) !$$$ subprogram documentation block ! . . . ! subprogram: what_color_is ! ! prgrmmr: ! ! abstract: ! ! program history log: ! 2008-03-25 safford -- add subprogram doc block, rm unused uses ! ! input argument list: ! i1, i2, i3 - ! ! output argument list: ! color - ! ! attributes: ! language: f90 ! machine: ibm RS/6000 SP ! !$$$ end documentation block implicit none integer(i_long),intent(IN ) :: i1,i2,i3 integer(i_long),intent( OUT) :: color integer(i_long),dimension(3):: v,vh,vh2,b124 logical same integer(i_long):: i,itest data b124/1,2,4/ !---------------------------------------------------------------- vh(1)=i1; vh(2)=i2; vh(3)=i3 do itest=1,20 v=vh; vh=v/2; vh2=vh*2 ! if(.NOT.same(vh2,v,3))exit same=.true. ; do i=1,3; if(vh2(i)/=v(i)) same=.false. ; enddo if(.not.same) exit enddo v=modulo(v,2) color=dot_product(v,b124) end subroutine what_color_is subroutine add_halox0(filter,nrows,ids,ide,jds,jde,ips,ipe,jps,jpe,mype,npes) !$$$ subprogram documentation block ! . . . . ! subprogram: add_halox0 ! prgmmr: ! ! abstract: setup everything needed for adding halo rows when required ! ! program history log: ! 2009-09-29 lueken - added subprogram doc block ! ! input argument list: ! nrows ! ids,ide,jds,jde - domain indices ! ips,ipe,jps,jpe - patch indices ! mype - mpi task id ! npes ! filter ! ! output argument list: ! filter ! ! attributes: ! language: f90 ! machine: ! !$$$ end documentation block implicit none TYPE(filter_cons),INTENT(inOUT) :: filter integer(i_long) ,intent(in ) :: nrows integer(i_long) ,intent(in ) :: ids,ide,jds,jde,ips,ipe,jps,jpe,mype,npes integer(i_long) ijglob_pe(ids:ide,jds:jde),ijglob_pe0(ids:ide,jds:jde) integer(i_long) nrecv_halo(0:npes-1),ndrecv_halo(0:npes) integer(i_long) nsend_halo(0:npes-1),ndsend_halo(0:npes) integer(i_long) i,i0,ii,ir,istat,j,mpe integer(i_long) info_recv_halo(2,npes*2*nrows*(ide-ids+1)) integer(i_long) info_send_halo(2,npes*2*nrows*(ide-ids+1)) integer(i_long) iorigin(npes*2*nrows*(ide-ids+1)) integer(i_long) indx(npes*2*nrows*(ide-ids+1)) integer(i_long) iwork(npes*2*nrows*(ide-ids+1)) integer(i_long) nrecv_halo_loc,nsend_halo_loc,mpi_string1,ierror if(npes==1.or.nrows==0) return if(ips==ids.and.ipe==ide) return ijglob_pe0=0 do j=jps,jpe do i=ips,ipe ijglob_pe0(i,j)=mype end do end do call mpi_allreduce(ijglob_pe0,ijglob_pe,(ide-ids+1)*(jde-jds+1),mpi_integer4,mpi_sum,mpi_comm_world,ierror) ! create list of all points to be recieved ii=0 nrecv_halo=0 do i0=ips-nrows,ipe+1,ipe+1-ips+nrows do ir=0,nrows-1 i=i0+ir if(iide) cycle do j=jps,jpe ii=ii+1 info_recv_halo(1,ii)=i ; info_recv_halo(2,ii)=j iorigin(ii)=ijglob_pe(i,j) nrecv_halo(ijglob_pe(i,j))=nrecv_halo(ijglob_pe(i,j))+1 end do end do end do ndrecv_halo(0)=0 do mpe=1,npes ndrecv_halo(mpe)=ndrecv_halo(mpe-1)+nrecv_halo(mpe-1) end do call mpi_alltoall(nrecv_halo,1,mpi_integer4,nsend_halo,1,mpi_integer4,mpi_comm_world,ierror) ndsend_halo(0)=0 do mpe=1,npes ndsend_halo(mpe)=ndsend_halo(mpe-1)+nsend_halo(mpe-1) end do nsend_halo_loc=ndsend_halo(npes) nrecv_halo_loc=ndrecv_halo(npes) ! sort origin pe numbers from smallest to largest if(ii>0) then call indexxi4(ii,iorigin,indx) ! use sort index to reorder do j=1,2 do i=1,ii iwork(i)=info_recv_halo(j,indx(i)) end do do i=1,ii info_recv_halo(j,i)=iwork(i) end do end do end if call mpi_type_contiguous(2_i_long,mpi_integer4,mpi_string1,ierror) call mpi_type_commit(mpi_string1,ierror) call mpi_alltoallv(info_recv_halo,nrecv_halo,ndrecv_halo,mpi_string1, & info_send_halo,nsend_halo,ndsend_halo,mpi_string1,mpi_comm_world,ierror) call mpi_type_free(mpi_string1,ierror) filter%nsend_halox_loc=nsend_halo_loc filter%nrecv_halox_loc=nrecv_halo_loc deallocate(filter%nrecv_halox,stat=istat) allocate(filter%nrecv_halox(0:npes-1)) do i=0,npes-1 filter%nrecv_halox(i)=nrecv_halo(i) end do deallocate(filter%ndrecv_halox,stat=istat) allocate(filter%ndrecv_halox(0:npes)) do i=0,npes filter%ndrecv_halox(i)=ndrecv_halo(i) end do deallocate(filter%nsend_halox,stat=istat) allocate(filter%nsend_halox(0:npes-1)) do i=0,npes-1 filter%nsend_halox(i)=nsend_halo(i) end do deallocate(filter%ndsend_halox,stat=istat) allocate(filter%ndsend_halox(0:npes)) do i=0,npes filter%ndsend_halox(i)=ndsend_halo(i) end do deallocate(filter%info_send_halox,stat=istat) allocate(filter%info_send_halox(2,nsend_halo_loc)) do i=1,nsend_halo_loc do j=1,2 filter%info_send_halox(j,i)=info_send_halo(j,i) end do end do deallocate(filter%info_recv_halox,stat=istat) allocate(filter%info_recv_halox(2,nrecv_halo_loc)) do i=1,nrecv_halo_loc do j=1,2 filter%info_recv_halox(j,i)=info_recv_halo(j,i) end do end do end subroutine add_halox0 subroutine add_haloy0(filter,nrows,ids,ide,jds,jde,ips,ipe,jps,jpe,mype,npes) !$$$ subprogram documentation block ! . . . . ! subprogram: add_haloy0 ! prgmmr: ! ! abstract: setup everything needed for adding halo rows when required ! ! program history log: ! 2009-09-29 lueken - added subprogram doc block ! ! input argument list: ! nrows ! ids,ide,jds,jde - domain indices ! ips,ipe,jps,jpe - patch indices ! mype - mpi task id ! npes ! filter ! ! output argument list: ! filter ! ! attributes: ! language: f90 ! machine: ! !$$$ end documentation block implicit none TYPE(filter_cons),INTENT(inout) :: filter integer(i_long) ,intent(in ) :: nrows integer(i_long) ,intent(in ) :: ids,ide,jds,jde,ips,ipe,jps,jpe,mype,npes integer(i_long) ijglob_pe(ids:ide,jds:jde),ijglob_pe0(ids:ide,jds:jde) integer(i_long) nrecv_halo(0:npes-1),ndrecv_halo(0:npes) integer(i_long) nsend_halo(0:npes-1),ndsend_halo(0:npes) integer(i_long) i,ii,istat,j,j0,jr,mpe integer(i_long) info_recv_halo(2,npes*2*nrows*(jde-jds+1)) integer(i_long) info_send_halo(2,npes*2*nrows*(jde-jds+1)) integer(i_long) iorigin(npes*2*nrows*(jde-jds+1)) integer(i_long) indx(npes*2*nrows*(jde-jds+1)) integer(i_long) iwork(npes*2*nrows*(jde-jds+1)) integer(i_long) nrecv_halo_loc,nsend_halo_loc,mpi_string1,ierror if(npes==1.or.nrows==0) return if(jps==jds.and.jpe==jde) return ijglob_pe0=0 do j=jps,jpe do i=ips,ipe ijglob_pe0(i,j)=mype end do end do call mpi_allreduce(ijglob_pe0,ijglob_pe,(ide-ids+1)*(jde-jds+1),mpi_integer4,mpi_sum,mpi_comm_world,ierror) ! create list of all points to be recieved ii=0 nrecv_halo=0 do j0=jps-nrows,jpe+1,jpe+1-jps+nrows do jr=0,nrows-1 j=j0+jr if(jjde) cycle do i=ips,ipe ii=ii+1 info_recv_halo(1,ii)=i ; info_recv_halo(2,ii)=j iorigin(ii)=ijglob_pe(i,j) nrecv_halo(ijglob_pe(i,j))=nrecv_halo(ijglob_pe(i,j))+1 end do end do end do ndrecv_halo(0)=0 do mpe=1,npes ndrecv_halo(mpe)=ndrecv_halo(mpe-1)+nrecv_halo(mpe-1) end do call mpi_alltoall(nrecv_halo,1,mpi_integer4,nsend_halo,1,mpi_integer4,mpi_comm_world,ierror) ndsend_halo(0)=0 do mpe=1,npes ndsend_halo(mpe)=ndsend_halo(mpe-1)+nsend_halo(mpe-1) end do nsend_halo_loc=ndsend_halo(npes) nrecv_halo_loc=ndrecv_halo(npes) ! sort origin pe numbers from smallest to largest if(ii>0) then call indexxi4(ii,iorigin,indx) ! use sort index to reorder do j=1,2 do i=1,ii iwork(i)=info_recv_halo(j,indx(i)) end do do i=1,ii info_recv_halo(j,i)=iwork(i) end do end do end if call mpi_type_contiguous(2_i_long,mpi_integer4,mpi_string1,ierror) call mpi_type_commit(mpi_string1,ierror) call mpi_alltoallv(info_recv_halo,nrecv_halo,ndrecv_halo,mpi_string1, & info_send_halo,nsend_halo,ndsend_halo,mpi_string1,mpi_comm_world,ierror) call mpi_type_free(mpi_string1,ierror) filter%nsend_haloy_loc=nsend_halo_loc filter%nrecv_haloy_loc=nrecv_halo_loc deallocate(filter%nrecv_haloy,stat=istat) allocate(filter%nrecv_haloy(0:npes-1)) do i=0,npes-1 filter%nrecv_haloy(i)=nrecv_halo(i) end do deallocate(filter%ndrecv_haloy,stat=istat) allocate(filter%ndrecv_haloy(0:npes)) do i=0,npes filter%ndrecv_haloy(i)=ndrecv_halo(i) end do deallocate(filter%nsend_haloy,stat=istat) allocate(filter%nsend_haloy(0:npes-1)) do i=0,npes-1 filter%nsend_haloy(i)=nsend_halo(i) end do deallocate(filter%ndsend_haloy,stat=istat) allocate(filter%ndsend_haloy(0:npes)) do i=0,npes filter%ndsend_haloy(i)=ndsend_halo(i) end do deallocate(filter%info_send_haloy,stat=istat) allocate(filter%info_send_haloy(2,nsend_halo_loc)) do i=1,nsend_halo_loc do j=1,2 filter%info_send_haloy(j,i)=info_send_halo(j,i) end do end do deallocate(filter%info_recv_haloy,stat=istat) allocate(filter%info_recv_haloy(2,nrecv_halo_loc)) do i=1,nrecv_halo_loc do j=1,2 filter%info_recv_haloy(j,i)=info_recv_halo(j,i) end do end do end subroutine add_haloy0 subroutine add_halo_x(f,g,filter,ngauss,nrows,ids,ide,ips,ipe,jps,jpe,kps,kpe,npes) !$$$ subprogram documentation block ! . . . . ! subprogram: add_halo_x ! prgmmr: ! ! abstract: ! ! program history log: ! 2009-09-29 lueken - added subprogram doc block ! ! input argument list: ! filter ! ngauss,nrows ! ids,ide - domain indices ! ips,ipe,jps,jpe,kps,kpe - patch indices ! npes ! f ! ! output argument list: ! g ! ! attributes: ! language: f90 ! machine: ! !$$$ end documentation block implicit none TYPE(filter_cons),INTENT(in ) :: filter integer(i_long) ,intent(in ) :: ngauss,nrows integer(i_long) ,intent(in ) :: ids,ide,ips,ipe,jps,jpe,kps,kpe,npes real(r_single) ,intent(in ) :: f(ngauss,ips:ipe,jps:jpe,kps:kpe) real(r_single) ,intent( out) :: g(ngauss,max(ids,ips-nrows):min(ide,ipe+nrows),jps:jpe,kps:kpe) integer(i_long) i,j,k,mpi_string2,n,ierror real(r_single),allocatable:: bufsend(:,:,:),bufrecv(:,:,:) do k=kps,kpe do j=jps,jpe do i=ips,ipe do n=1,ngauss g(n,i,j,k)=f(n,i,j,k) end do end do end do end do if(npes==1.or.nrows==0.or.(ips==ids.and.ipe==ide)) return ! now gather up points to send allocate(bufsend(ngauss,kps:kpe,filter%nsend_halox_loc),bufrecv(ngauss,kps:kpe,filter%nrecv_halox_loc)) do i=1,filter%nsend_halox_loc do k=kps,kpe do n=1,ngauss bufsend(n,k,i)=f(n,filter%info_send_halox(1,i),filter%info_send_halox(2,i),k) end do end do end do call mpi_type_contiguous(ngauss*(kpe-kps+1),mpi_real4,mpi_string2,ierror) call mpi_type_commit(mpi_string2,ierror) call mpi_alltoallv(bufsend,filter%nsend_halox,filter%ndsend_halox,mpi_string2, & bufrecv,filter%nrecv_halox,filter%ndrecv_halox,mpi_string2,mpi_comm_world,ierror) call mpi_type_free(mpi_string2,ierror) ! finally distribute points back do i=1,filter%nrecv_halox_loc do k=kps,kpe do n=1,ngauss g(n,filter%info_recv_halox(1,i),filter%info_recv_halox(2,i),k)=bufrecv(n,k,i) end do end do end do deallocate(bufsend,bufrecv) end subroutine add_halo_x subroutine add_halo_y(f,g,filter,ngauss,nrows,jds,jde,ips,ipe,jps,jpe,kps,kpe,npes) !$$$ subprogram documentation block ! . . . . ! subprogram: add_halo_y ! prgmmr: ! ! abstract: ! ! program history log: ! 2009-09-29 lueken - added subprogram doc block ! ! input argument list: ! filter ! ngauss,nrows ! jds,jde - domain indices ! ips,ipe,jps,jpe,kps,kpe - patch indices ! npes ! f ! ! output argument list: ! g ! ! attributes: ! language: f90 ! machine: ! !$$$ end documentation block implicit none TYPE(filter_cons),INTENT(in ) :: filter integer(i_long) ,intent(in ) :: ngauss,nrows integer(i_long) ,intent(in ) :: jds,jde,ips,ipe,jps,jpe,kps,kpe,npes real(r_single) ,intent(in ) :: f(ngauss,ips:ipe,jps:jpe,kps:kpe) real(r_single) ,intent( out) :: g(ngauss,ips:ipe,max(jds,jps-nrows):min(jde,jpe+nrows),kps:kpe) integer(i_long) i,j,k,mpi_string2,n,ierror real(r_single),allocatable:: bufsend(:,:,:),bufrecv(:,:,:) do k=kps,kpe do j=jps,jpe do i=ips,ipe do n=1,ngauss g(n,i,j,k)=f(n,i,j,k) end do end do end do end do if(npes==1.or.nrows==0.or.(jps==jds.and.jpe==jde)) return ! now gather up points to send allocate(bufsend(ngauss,kps:kpe,filter%nsend_haloy_loc),bufrecv(ngauss,kps:kpe,filter%nrecv_haloy_loc)) do i=1,filter%nsend_haloy_loc do k=kps,kpe do n=1,ngauss bufsend(n,k,i)=f(n,filter%info_send_haloy(1,i),filter%info_send_haloy(2,i),k) end do end do end do call mpi_type_contiguous(ngauss*(kpe-kps+1),mpi_real4,mpi_string2,ierror) call mpi_type_commit(mpi_string2,ierror) call mpi_alltoallv(bufsend,filter%nsend_haloy,filter%ndsend_haloy,mpi_string2, & bufrecv,filter%nrecv_haloy,filter%ndrecv_haloy,mpi_string2,mpi_comm_world,ierror) call mpi_type_free(mpi_string2,ierror) ! finally distribute points back do i=1,filter%nrecv_haloy_loc do k=kps,kpe do n=1,ngauss g(n,filter%info_recv_haloy(1,i),filter%info_recv_haloy(2,i),k)=bufrecv(n,k,i) end do end do end do deallocate(bufsend,bufrecv) end subroutine add_halo_y !!!!!!!!!!!!!!!!!!!!!!!!!!! following is new code for implementation of approximate 4th order factorization !!!!!!!!!!!!!!!!!!!!!!!!!!! which yields almost a 4th order result for the cost of a 2nd order filter. !!!!!!!!!!!!!!!!!!!!!!!!!!! this is because the approximate factorization breaks the 4th order operator !!!!!!!!!!!!!!!!!!!!!!!!!!! into 4 factors, which are then recombined in such a way that the last !???????????? -- there may still be a bug in the new factorization code, because there is !???????????? bad behaviour at the boundaries for homogeneous, isotropic run. !??????????? however, in the interior, new filter matches very well with exact 4th order. subroutine one_color4_new_factorization(g,filter,ngauss,ipass,ifilt_ord, & nstrings,istart,ips,ipe,jps,jpe,kps,kpe,npes,iadvance,iback) !$$$ subprogram documentation block ! . . . . ! subprogram: one_color4_new_factorization ! prgmmr: ! ! abstract: apply one forward-backward recursive filter for one color. ! use new approximate 4th order factorization ! ! program history log: ! 2009-09-29 lueken - added subprogram doc block ! ! input argument list: ! ips,ipe,jps,jpe,kps,kpe - patch indices ! npes ! ngauss,iadvance,iback ! ipass - total number of contiguous string points ! ifilt_ord ! nstrings ! istart ! filter ! g ! ! output argument list: ! g ! ! attributes: ! language: f90 ! machine: ! !$$$ end documentation block implicit none INTEGER(i_long) ,INTENT(IN ) :: & ips, ipe, jps, jpe, kps, kpe ! patch indices INTEGER(i_long) ,INTENT(IN ) :: & npes,ngauss,iadvance,iback INTEGER(i_long) ,INTENT(IN ) :: & ipass ! total number of contiguous string points integer(i_long) ,intent(in ) :: ifilt_ord real(r_single), DIMENSION( ngauss,ips:ipe, jps:jpe, kps:kpe ),INTENT(INOUT) :: & g ! input--field on grid, output--filtered field on grid integer(i_long) ,intent(in ) :: nstrings integer(i_long) ,intent(in ) :: istart(nstrings+1) type(filter_cons) ,intent(in ) :: filter real(r_single) work(ngauss,max(1,filter%npointsmax),2) real(r_single) work2(max(1,filter%npointsmax)) integer(i_long) i,ierr,igauss,j,l,mpi_string !-- gather up strings call mpi_type_contiguous(ngauss,mpi_real4,mpi_string,ierr) call mpi_type_commit(mpi_string,ierr) if(filter%npoints_send>0) then do i=1,filter%npoints_send do igauss=1,ngauss work(igauss,i,1)=g(igauss,filter%ia(i),filter%ja(i),filter%ka(i)) end do end do end if if(npes==1.and.filter%npoints_recv>0) then do i=1,filter%npoints_recv do igauss=1,ngauss work(igauss,i,2)=work(igauss,i,1) end do end do else call mpi_alltoallv(work(1,1,1),filter%nsend,filter%ndsend,mpi_string, & work(1,1,2),filter%nrecv,filter%ndrecv,mpi_string,mpi_comm_world,ierr) end if if(filter%npoints_recv>0) then do igauss=1,ngauss do i=1,filter%npoints_recv work2(i)=work(igauss,filter%ib(i),2) end do do j=1,nstrings do i=istart(j),istart(j+1)-1 do l=1,min(ifilt_ord,i-istart(j)) work2(i)=work2(i)-filter%fmat(l,i,ipass,igauss,iadvance)*work2(i-l) end do work2(i)=filter%fmat0(i,ipass,igauss,iadvance)*work2(i) end do do i=istart(j+1)-1,istart(j),-1 do l=1,min(ifilt_ord,istart(j+1)-i-1) work2(i)=work2(i)-filter%fmat(l,i+l,ipass,igauss,iback)*work2(i+l) end do work2(i)=filter%fmat0(i,ipass,igauss,iback)*work2(i) end do end do do i=1,filter%npoints_recv work(igauss,i,1)=work2(i) end do end do !-- send strings back do i=1,filter%npoints_recv do igauss=1,ngauss work(igauss,filter%ib(i),2)=work(igauss,i,1) end do end do end if if(npes==1.and.filter%npoints_recv>0) then do i=1,filter%npoints_recv do igauss=1,ngauss work(igauss,i,1)=work(igauss,i,2) end do end do else call mpi_alltoallv(work(1,1,2),filter%nrecv,filter%ndrecv,mpi_string, & work(1,1,1),filter%nsend,filter%ndsend,mpi_string,mpi_comm_world,ierr) end if if(filter%npoints_send>0) then do i=1,filter%npoints_send do igauss=1,ngauss g(igauss,filter%ia(i),filter%ja(i),filter%ka(i))=work(igauss,i,1) end do end do end if call mpi_type_free(mpi_string,ierr) end subroutine one_color4_new_factorization subroutine one_color24_new_factorization(g,filter,ngauss,ipass,ifilt_ord, & nstrings,istart,ips,ipe,jps,jpe,kps,kpe,npes,iadvance,iback) !$$$ subprogram documentation block ! . . . . ! subprogram: one_color24_new_factorization ! prgmmr: ! ! abstract: apply one forward-backward recursive filter for one color ! ! program history log: ! 2009-09-29 lueken - added subprogram doc block ! ! input argument list: ! ips,ipe,jps,jpe,kps,kpe - patch indices ! npes ! ngauss,iadvance,iback ! ipass ! ifilt_ord ! nstrings ! istart ! filter ! g ! ! output argument list: ! g ! ! attributes: ! language: f90 ! machine: ! !$$$ end documentation block implicit none INTEGER(i_long) ,INTENT(IN ) :: & ips, ipe, jps, jpe, kps, kpe ! patch indices INTEGER(i_long) ,INTENT(IN ) :: & npes,ngauss,iadvance,iback INTEGER(i_long) ,INTENT(IN ) :: & ipass ! total number of contiguous string points integer(i_long) ,intent(in ) :: ifilt_ord real(r_single), DIMENSION(2,ngauss, ips:ipe, jps:jpe, kps:kpe ),INTENT(INOUT) :: & g ! input--field on grid, output--filtered field on grid integer(i_long) ,intent(in ) :: nstrings integer(i_long) ,intent(in ) :: istart(nstrings+1) type(filter_cons) ,intent(in ) :: filter real(r_single) work(2,ngauss,max(1,filter%npointsmax),2) real(r_single) work2(max(1,filter%npointsmax)) integer(i_long) i,ierr,igauss,ii,j,l,mpi_string !-- gather up strings call mpi_type_contiguous(2*ngauss,mpi_real4,mpi_string,ierr) call mpi_type_commit(mpi_string,ierr) if(filter%npoints_send>0) then do i=1,filter%npoints_send do igauss=1,ngauss work(1,igauss,i,1)=g(1,igauss,filter%ia(i),filter%ja(i),filter%ka(i)) work(2,igauss,i,1)=g(2,igauss,filter%ia(i),filter%ja(i),filter%ka(i)) end do end do end if if(npes==1.and.filter%npoints_recv>0) then do i=1,filter%npoints_recv do igauss=1,ngauss work(1,igauss,i,2)=work(1,igauss,i,1) work(2,igauss,i,2)=work(2,igauss,i,1) end do end do else call mpi_alltoallv(work(1,1,1,1),filter%nsend,filter%ndsend,mpi_string, & work(1,1,1,2),filter%nrecv,filter%ndrecv,mpi_string,mpi_comm_world,ierr) end if if(filter%npoints_recv>0) then do igauss=1,ngauss do ii=1,2 do i=1,filter%npoints_recv work2(i)=work(ii,igauss,filter%ib(i),2) end do do j=1,nstrings do i=istart(j),istart(j+1)-1 do l=1,min(ifilt_ord,i-istart(j)) work2(i)=work2(i)-filter%fmat(l,i,ipass,igauss,iadvance)*work2(i-l) end do work2(i)=filter%fmat0(i,ipass,igauss,iadvance)*work2(i) end do do i=istart(j+1)-1,istart(j),-1 do l=1,min(ifilt_ord,istart(j+1)-i-1) work2(i)=work2(i)-filter%fmat(l,i+l,ipass,igauss,iback)*work2(i+l) end do work2(i)=filter%fmat0(i,ipass,igauss,iback)*work2(i) end do end do do i=1,filter%npoints_recv work(ii,igauss,i,1)=work2(i) end do end do end do !-- send strings back do i=1,filter%npoints_recv do igauss=1,ngauss work(1,igauss,filter%ib(i),2)=work(1,igauss,i,1) work(2,igauss,filter%ib(i),2)=work(2,igauss,i,1) end do end do end if if(npes==1.and.filter%npoints_recv>0) then do i=1,filter%npoints_recv do igauss=1,ngauss work(1,igauss,i,1)=work(1,igauss,i,2) work(2,igauss,i,1)=work(2,igauss,i,2) end do end do else call mpi_alltoallv(work(1,1,1,2),filter%nrecv,filter%ndrecv,mpi_string, & work(1,1,1,1),filter%nsend,filter%ndsend,mpi_string,mpi_comm_world,ierr) end if if(filter%npoints_send>0) then do i=1,filter%npoints_send do igauss=1,ngauss g(1,igauss,filter%ia(i),filter%ja(i),filter%ka(i))=work(1,igauss,i,1) g(2,igauss,filter%ia(i),filter%ja(i),filter%ka(i))=work(2,igauss,i,1) end do end do end if call mpi_type_free(mpi_string,ierr) end subroutine one_color24_new_factorization subroutine new_alpha_beta4(info_string,aspect_full,rgauss,fmat,fmat0,igauss,ngauss, & istart_out,npoints_mype,binomial,npass, & lenbar,lenmax,lenmin,npoints1,nvars) !$$$ subprogram documentation block ! . . . . ! subprogram: new_alpha_beta4 ! prgmmr: ! ! abstract: compute recursion constants alpha and beta along unbroken strings ! ! program history log: ! 2009-09-30 lueken - added subprogram doc block ! ! input argument list: ! nvars ! npoints_mype ! npass ! binomial ! info_string ! igauss,ngauss ! rgauss ! fmat,fmat0 ! ! output argument list: ! fmat,fmat0 ! istart_out ! lenmax,lenmin,npoints1 ! lenbar ! ! attributes: ! language: f90 ! machine: ! !$$$ end documentation block IMPLICIT NONE INTEGER(i_long) ,INTENT(IN ) :: nvars INTEGER(i_long) ,INTENT(IN ) :: npoints_mype,npass REAL(r_double) , DIMENSION( 10, 10 ) ,INTENT(IN ) :: binomial INTEGER(i_short), DIMENSION( 8, npoints_mype ),INTENT(IN ) :: & info_string ! 1---- distance from origin to current point ! 2,3,4-- origin coordinates ! 5,6,7,8-- jumpx,jumpy,jumpz,ivar for this string real(r_single), DIMENSION( npoints_mype ) ,INTENT(IN ) :: & aspect_full integer(i_long) ,intent(in ) :: igauss,ngauss real(r_double) ,intent(in ) :: rgauss real(r_single) ,intent(inout) :: & fmat(2,npoints_mype,npass,ngauss,2),fmat0(npoints_mype,npass,ngauss,2) integer(i_long) ,intent( out) :: istart_out(*) integer(i_long) ,intent( out) :: lenmax(nvars),lenmin(nvars),npoints1(nvars) ! diagnostic output--to look at string real(r_double) ,intent( out) :: lenbar(nvars) integer(i_long) i,iend,ipass,istart,ivar integer(i_long) nstrings nstrings=0 istart=1 if(npoints_mype>1) then do i=2,npoints_mype if(info_string(1,i)/=info_string(1,i-1)+1.or. & info_string(2,i)/=info_string(2,i-1).or. & info_string(3,i)/=info_string(3,i-1).or. & info_string(4,i)/=info_string(4,i-1).or. & info_string(5,i)/=info_string(5,i-1).or. & info_string(6,i)/=info_string(6,i-1).or. & info_string(7,i)/=info_string(7,i-1).or. & info_string(8,i)/=info_string(8,i-1)) then iend=i-1 if(igauss==1) then ivar=info_string(8,iend) lenbar(ivar)=lenbar(ivar)+(iend-istart+1) lenmax(ivar)=max(iend-istart+1,lenmax(ivar)) lenmin(ivar)=min(iend-istart+1,lenmin(ivar)) if(iend==istart) npoints1(ivar)=npoints1(ivar)+1 end if nstrings=nstrings+1 istart_out(nstrings)=istart istart_out(nstrings+1)=iend+1 do ipass=1,npass call new_alpha_betaa4(aspect_full(istart),rgauss,iend-istart+1,binomial(ipass,npass), & fmat(1,istart,ipass,igauss,1),fmat0(istart,ipass,igauss,1), & fmat(1,istart,ipass,igauss,2),fmat0(istart,ipass,igauss,2)) end do istart=iend+1 end if end do end if iend=npoints_mype if(igauss==1) then ivar=info_string(8,iend) lenbar(ivar)=lenbar(ivar)+(iend-istart+1) lenmax(ivar)=max(iend-istart+1,lenmax(ivar)) lenmin(ivar)=min(iend-istart+1,lenmin(ivar)) if(iend==istart) npoints1(ivar)=npoints1(ivar)+1 end if nstrings=nstrings+1 istart_out(nstrings)=istart istart_out(nstrings+1)=iend+1 do ipass=1,npass call new_alpha_betaa4(aspect_full(istart),rgauss,iend-istart+1,binomial(ipass,npass), & fmat(1,istart,ipass,igauss,1),fmat0(istart,ipass,igauss,1), & fmat(1,istart,ipass,igauss,2),fmat0(istart,ipass,igauss,2)) end do end subroutine new_alpha_beta4 subroutine new_alpha_betaa4(aspect,rgauss,ng,binomial,fmat1,fmat01,fmat2,fmat02) !$$$ subprogram documentation block ! . . . . ! subprogram: new_alpha_betaa4 ! prgmmr: ! ! abstract: compute constants for special Purser 4th order factorization ! ! program history log: ! 2009-09-30 lueken - added subprogram doc block ! ! input argument list: ! aspect: correlation scale (squared, i think), grid units ! rgauss: multiplier of aspect, for multiple gaussians--used for fat-tail filter ! ng: length of string ! binomial: weighting factors (perhaps not needed with high-order filter) ! ! output argument list: ! fmat1,fmat01,fmat2,fmat02 : filter parameters for special factorization ! ! attributes: ! language: f90 ! machine: ! !$$$ end documentation block IMPLICIT NONE INTEGER(i_long) ,INTENT(IN ) :: ng REAL(r_double) ,INTENT(IN ) :: binomial real(r_single), DIMENSION( ng ),INTENT(IN ) :: aspect real(r_double) ,intent(in ) :: rgauss real(r_single) ,intent( out) :: fmat1(2,ng),fmat01(ng) real(r_single) ,intent( out) :: fmat2(2,ng),fmat02(ng) real(r_double) sig(0:ng-1) real(r_double) fmat(0:ng-1,-2:0,2) integer(i_long) i do i=1,ng sig(i-1)=sqrt(rgauss*aspect(i)*binomial) end do call stringop(ng-1,sig,fmat) do i=1,ng fmat1(2,i)=fmat(i-1,-2,1) fmat1(1,i)=fmat(i-1,-1,1) fmat01(i)=one/fmat(i-1,0,1) fmat2(2,i)=fmat(i-1,-2,2) fmat2(1,i)=fmat(i-1,-1,2) fmat02(i)=one/fmat(i-1,0,2) end do end subroutine new_alpha_betaa4 !============================================================================= subroutine stringop(n,sig,fmat) !============================================================================= !$$$ subprogram documentation block ! . . . . ! subprogram: stringop ! prgmmr: ! ! abstract: ! ! program history log: ! 2009-09-30 lueken - added subprogram doc block ! ! input argument list: ! n ! sig ! ! output argument list: ! fmat ! ! attributes: ! language: f90 ! machine: ! !$$$ end documentation block implicit none integer(i_long) ,intent(IN ) :: n real(r_double),dimension(0:n) ,intent(IN ) :: sig real(r_double),dimension(0:n,-2:0,2),intent( OUT) :: fmat !---------------------------------------------------------------------------- complex(r_double),dimension(2) :: rts real(r_double), dimension(0:n,-1:1) :: dmat real(r_double) :: dmatt integer(i_long) :: i,im data rts/ & (-3.4588884621354108_r_double, 1.7779487522437312_r_double), & (-0.54111153786458903_r_double, 5.0095518087248694_r_double)/ !============================================================================= dmat=0 do i=1,n im=i-1 dmatt=sig(im)*sig(i) dmat(im,1)=-dmatt dmat(i,-1)=-dmatt dmat(im,0)= dmat(im,0)+dmatt dmat(i,0 )= dmat(i,0 )+dmatt enddo do i=1,2 call quadfil(n,dmat,rts(i),fmat(:,:,i)) enddo end subroutine stringop !============================================================================= subroutine quadfil(n,dmat,rts,fmat) !============================================================================= !$$$ subprogram documentation block ! . . . . ! subprogram: quadfil ! prgmmr: ! ! abstract: ! ! program history log: ! 2009-09-30 lueken - added subprogram doc block ! ! input argument list: ! n ! dmat ! rts ! ! output argument list: ! fmat ! ! attributes: ! language: f90 ! machine: ! !$$$ end documentation block use module_pmat2,only: mulbb,l1lb implicit none integer(i_long) ,intent(IN ) :: n real(r_double),dimension(0:n,-1:1),intent(IN ) :: dmat complex(r_double) ,intent(IN ) :: rts real(r_double),dimension(0:n,-2:0),intent( OUT) :: fmat !----------------------------------------------------------------------------- real(r_double),dimension(0:n,-1:1) :: tmat real(r_double),dimension(0:n,-2:2) :: umat real(r_double) :: a1,a2,rrtsi,qrtsi complex(r_double) :: rtsi integer(i_long) :: np !============================================================================= np=n+1 rtsi=one/rts rrtsi=real(rtsi) qrtsi=imag(rtsi) a1=-2*rrtsi a2=rrtsi**2+qrtsi**2 tmat(0:n,-1:1)=a2*dmat tmat(0:n,0)=tmat(0:n,0)+a1 call mulbb(dmat,tmat(0:n,-1:1),umat(0:n,-2:2), np,np, 1,1, 1,1, 2,2) umat(0:n,0)=umat(0:n,0)+one if(n==zero) then fmat=zero fmat(0,0)=umat(0,0) else call l1lb(umat,fmat,np,2) end if end subroutine quadfil !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! end of added code for new factorization end module raflib SUBROUTINE EIGEN(A,R,N,MV) !$$$ subprogram documentation block ! . . . ! subprogram: EIGEN ! ! prgrmmr: R. J. Purser, NCEP 2005 ! ! abstract: COMPUTE EIGENVALUES AND EIGENVECTORS OF A REAL SYMMETRIC ! MATRIX ! ! REMARKS ! ORIGINAL MATRIX A MUST BE REAL SYMMETRIC (STORAGE MODE=1) ! MATRIX A CANNOT BE IN THE SAME LOCATION AS MATRIX R ! ! SUBROUTINES AND FUNCTION SUBPROGRAMS REQUIRED ! NONE ! ! METHOD ! DIAGONALIZATION METHOD ORIGINATED BY JACOBI AND ADAPTED ! BY VON NEUMANN FOR LARGE COMPUTERS AS FOUND IN 'MATHEMATICAL ! METHODS FOR DIGITAL COMPUTERS', EDITED BY A. RALSTON AND ! H.S. WILF, JOHN WILEY AND SONS, NEW YORK, 1962, CHAPTER 7 ! ! program history log: ! 2008-04-22 safford -- add subprogram doc block ! ! input argument list: ! A - ORIGINAL MATRIX (SYMMETRIC), DESTROYED IN COMPUTATION. ! RESULTANT EIGENVALUES ARE DEVELOPED IN DIAGONAL OF ! MATRIX A IN DESCENDING ORDER. ! R - RESULTANT MATRIX OF EIGENVECTORS (STORED COLUMNWISE, ! IN SAME SEQUENCE AS EIGENVALUES) ! N - ORDER OF MATRICES A AND R ! MV- INPUT CODE ! 0 COMPUTE EIGENVALUES AND EIGENVECTORS ! 1 COMPUTE EIGENVALUES ONLY (R NEED NOT BE ! DIMENSIONED BUT MUST STILL APPEAR IN CALLING SEQUENCE) ! ! output argument list: ! A - ORIGINAL MATRIX (SYMMETRIC), DESTROYED IN COMPUTATION. ! RESULTANT EIGENVALUES ARE DEVELOPED IN DIAGONAL OF ! MATRIX A IN DESCENDING ORDER. ! R - RESULTANT MATRIX OF EIGENVECTORS (STORED COLUMNWISE, ! IN SAME SEQUENCE AS EIGENVALUES) ! ! attributes: ! language: f90 ! machine: ibm RS/6000 SP ! !$$$ end documentation block use kinds, only: r_kind,i_kind use constants, only: zero,half,one,two implicit none ! REAL(4) A(1),R(1) ! ! ............................................................... ! ! IF A DOUBLE PRECISION VERSION OF THIS ROUTINE IS DESIRED, THE ! C IN COLUMN 1 SHOULD BE REMOVED FROM THE DOUBLE PRECISION ! STATEMENT WHICH FOLLOWS. ! real(r_kind) ,intent(inout) :: A(*), R(*) integer(i_kind),intent(in ) :: N, MV REAL(r_kind) ANORM,ANRMX,THR,X,Y,SINX,SINX2,COSX, & COSX2,SINCS,RANGE,ONEMYY integer(i_kind) I,J,K,IA,IQ,IJ,IL,IM,ILR,IMR,IND,L,M,MQ,LQ,LM, & JQ,MM,LL,ILQ,IMQ ! ! THE C MUST ALSO BE REMOVED FROM DOUBLE PRECISION STATEMENTS ! APPEARING IN OTHER ROUTINES USED IN CONJUNCTION WITH THIS ! ROUTINE. ! ! THE DOUBLE PRECISION VERSION OF THIS SUBROUTINE MUST ALSO ! CONTAIN DOUBLE PRECISION FORTRAN FUNCTIONS. SQRT IN STATEMENTS ! 40, 68, 75, AND 78 MUST BE CHANGED TO DSQRT. ABS IN STATEMENT ! 62 MUST BE CHANGED TO DABS. THE CONSTANT IN STATEMENT 5 SHOULD ! BE CHANGED TO 1.0D-12. ! ! ............................................................... ! ! GENERATE IDENTITY MATRIX ! RANGE=1.0E-12_r_kind if(mv/=1) then IQ=-N DO J=1,N IQ=IQ+N DO I=1,N IJ=IQ+I if(i==j) then R(IJ)=one else R(IJ)=zero end if 20 CONTINUE end do end do end if ! ! COMPUTE INITIAL AND FINAL NORMS (ANORM AND ANORMX) ! ANORM=zero DO I=1,N DO J=I,N if(i/=j) then IA=I+(J*J-J)/2 ANORM=ANORM+A(IA)*A(IA) end if end do end do if(anorm>zero) then ANORM=1.414_r_kind*SQRT(ANORM) ANRMX=ANORM*RANGE/FLOAT(N) ! ! INITIALIZE INDICATORS AND COMPUTE THRESHOLD, THR ! IND=0 THR=ANORM loop1: do THR=THR/FLOAT(N) loop2: do L=1 loop3: do M=L+1 ! ! COMPUTE SIN AND COS ! loop4: do MQ=(M*M-M)/2 LQ=(L*L-L)/2 LM=L+MQ ! 62 continue if(abs(a(lm))-thr>=zero) then IND=1 LL=L+LQ MM=M+MQ X=half*(A(LL)-A(MM)) Y=-A(LM)/ SQRT(A(LM)*A(LM)+X*X) if(x0.and..not.adjoint) then do k=kps,kpe do j=jps,jpe do i=ips,ipe do n=1,ngauss g(n,i,j,k)=gnormx(i)*g(n,i,j,k) end do end do end do end do end if call add_halo_x(g,gt,filter,ngauss,nrows,ids,ide,ips,ipe,jps,jpe,kps,kpe,npes) if(nsmooth>0) & call smther_one_x(gt,g,ngauss,ids,ide,ips,ipe,jps,jpe,kps,kpe) if(nsmooth_shapiro>0) & call smther_two_x(gt,g,ngauss,ids,ide,ips,ipe,jps,jpe,kps,kpe) deallocate(gt) if(nsmooth_shapiro>0.and.adjoint) then do k=kps,kpe do j=jps,jpe do i=ips,ipe do n=1,ngauss g(n,i,j,k)=gnormx(i)*g(n,i,j,k) end do end do end do end do end if ! smoothing in y direction allocate(gt(ngauss,ips:ipe,max(jds,jps-nrows):min(jde,jpe+nrows),kps:kpe)) if(nsmooth_shapiro>0.and..not.adjoint) then do k=kps,kpe do j=jps,jpe do i=ips,ipe do n=1,ngauss g(n,i,j,k)=gnormy(j)*g(n,i,j,k) end do end do end do end do end if call add_halo_y(g,gt,filter,ngauss,nrows,jds,jde,ips,ipe,jps,jpe,kps,kpe,npes) if(nsmooth>0) & call smther_one_y(gt,g,ngauss,jds,jde,ips,ipe,jps,jpe,kps,kpe) if(nsmooth_shapiro>0) & call smther_two_y(gt,g,ngauss,jds,jde,ips,ipe,jps,jpe,kps,kpe) deallocate(gt) if(nsmooth_shapiro>0.and.adjoint) then do k=kps,kpe do j=jps,jpe do i=ips,ipe do n=1,ngauss g(n,i,j,k)=gnormy(j)*g(n,i,j,k) end do end do end do end do end if end subroutine smther subroutine smther_one_x(gx,g,ngauss,ids,ide,ips,ipe,jps,jpe,kps,kpe) !$$$ subprogram documentation block ! . . . . ! subprogram: smther_one_x ! prgmmr: ! ! abstract: ! ! program history log: ! 2009-09-30 lueken - added subprogram doc block ! ! input argument list: ! ngauss ! ids,ide - domain indices ! ips,ipe,jps,jpe,kps,kpe - patch indices ! gx ! ! output argument list: ! g ! ! attributes: ! language: f90 ! machine: ! !$$$ end documentation block use kinds,only: r_single,i_long implicit none integer(i_long),intent(in ) :: ngauss integer(i_long),intent(in ) :: ids,ide,ips,ipe,jps,jpe,kps,kpe real(r_single) ,intent(in ) :: gx(ngauss,max(ids,ips-1):min(ide,ipe+1),jps:jpe,kps:kpe) real(r_single) ,intent( out) :: g(ngauss,ips:ipe,jps:jpe,kps:kpe) integer(i_long) i,im,ip,j,k,n do k=kps,kpe do j=jps,jpe do i=ips,ipe ip=min(i+1,ide) ; im=max(ids,i-1) do n=1,ngauss g(n,i,j,k)=.25_r_single*(gx(n,ip,j,k)+gx(n,im,j,k)) +.5_r_single*gx(n,i,j,k) end do end do end do end do end subroutine smther_one_x subroutine smther_one_y(gy,g,ngauss,jds,jde,ips,ipe,jps,jpe,kps,kpe) !$$$ subprogram documentation block ! . . . . ! subprogram: smther_one_y ! prgmmr: ! ! abstract: ! ! program history log: ! 2009-09-30 lueken - added subprogram doc block ! ! input argument list: ! ngauss ! jds,jde - domain indices ! ips,ipe,jps,jpe,kps,kpe - patch indices ! gy ! ! output argument list: ! g ! ! attributes: ! language: f90 ! machine: ! !$$$ end documentation block use kinds,only: r_single,i_long implicit none integer(i_long),intent(in ) :: ngauss integer(i_long),intent(in ) :: jds,jde,ips,ipe,jps,jpe,kps,kpe real(r_single) ,intent(in ) :: gy(ngauss,ips:ipe,max(jds,jps-1):min(jde,jpe+1),kps:kpe) real(r_single) ,intent( out) :: g(ngauss,ips:ipe,jps:jpe,kps:kpe) integer(i_long) i,j,jm,jp,k,n do k=kps,kpe do j=jps,jpe jp=min(j+1,jde) ; jm=max(jds,j-1) do i=ips,ipe do n=1,ngauss g(n,i,j,k)=.25_r_single*(gy(n,i,jp,k)+gy(n,i,jm,k)) +.5_r_single*gy(n,i,j,k) end do end do end do end do end subroutine smther_one_y subroutine smther_two_x(gx,g,ngauss,ids,ide,ips,ipe,jps,jpe,kps,kpe) !$$$ subprogram documentation block ! . . . . ! subprogram: smther_two ! prgmmr: pondeca org: np22 date: 2006-08-01 ! ! abstract: apply second moment preserving "shapiro smoother" ! in each direction of data slab ! ! program history log: ! 2006-08-01 pondeca ! ! input argument list: ! ngauss ! ids,ide - domain indices ! ips,ipe,jps,jpe,kps,kpe - patch indices ! gx ! ! output argument list: ! g ! ! attributes: ! language: f90 ! machine: ibm RS/6000 SP ! !$$$ end documentation block use kinds,only: r_single,i_long implicit none integer(i_long),intent(in ) :: ngauss integer(i_long),intent(in ) :: ids,ide,ips,ipe,jps,jpe,kps,kpe real(r_single) ,intent(in ) :: gx(ngauss,max(ids,ips-3):min(ide,ipe+3),jps:jpe,kps:kpe) real(r_single) ,intent( out) :: g(ngauss,ips:ipe,jps:jpe,kps:kpe) ! on input: original data slab ! on ouput: filtered data slab integer(i_long) i,im,im3,ip,ip3,istart,iend,j,k,n istart=ips ; iend=ipe if(ips==ids) then i=ids ; ip=i+1 ; ip3=i+3_i_long do k=kps,kpe do j=jps,jpe do n=1,ngauss g(n,i,j,k)=.5_r_single*gx(n,i,j,k)+.28125_r_single*gx(n,ip,j,k)-.03125_r_single*gx(n,ip3,j,k) end do end do end do i=ids+1 ; ip=i+1 ; ip3=i+3_i_long ; im=i-1 do k=kps,kpe do j=jps,jpe do n=1,ngauss g(n,i,j,k)=.5_r_single*gx(n,i,j,k)+.28125_r_single*(gx(n,ip,j,k)+gx(n,im,j,k))-.03125_r_single*gx(n,ip3,j,k) end do end do end do i=ids+2_i_long ; ip=i+1 ; ip3=i+3_i_long ; im=i-1 do k=kps,kpe do j=jps,jpe do n=1,ngauss g(n,i,j,k)=.5_r_single*gx(n,i,j,k)+.28125_r_single*(gx(n,ip,j,k)+gx(n,im,j,k))-.03125_r_single*gx(n,ip3,j,k) end do end do end do istart=ids+3_i_long end if if(ipe==ide) then i=ide-2_i_long ; ip=i+1 ; im3=i-3_i_long ; im=i-1 do k=kps,kpe do j=jps,jpe do n=1,ngauss g(n,i,j,k)=.5_r_single*gx(n,i,j,k)+.28125_r_single*(gx(n,ip,j,k)+gx(n,im,j,k))-.03125_r_single*gx(n,im3,j,k) end do end do end do i=ide-1 ; ip=i+1 ; im3=i-3_i_long ; im=i-1 do k=kps,kpe do j=jps,jpe do n=1,ngauss g(n,i,j,k)=.5_r_single*gx(n,i,j,k)+.28125_r_single*(gx(n,ip,j,k)+gx(n,im,j,k))-.03125_r_single*gx(n,im3,j,k) end do end do end do i=ide ; im3=i-3_i_long ; im=i-1 do k=kps,kpe do j=jps,jpe do n=1,ngauss g(n,i,j,k)=.5_r_single*gx(n,i,j,k)+.28125_r_single*gx(n,im,j,k)-.03125_r_single*gx(n,im3,j,k) end do end do end do iend=ide-3_i_long end if do k=kps,kpe do j=jps,jpe do i=istart,iend ip=i+1 ; im=i-1 ; ip3=i+3_i_long ; im3=i-3_i_long do n=1,ngauss g(n,i,j,k)=.28125_r_single*(gx(n,ip,j,k)+gx(n,im,j,k))+.5_r_single*gx(n,i,j,k) & -.03125_r_single*(gx(n,ip3,j,k)+gx(n,im3,j,k)) end do end do end do end do end subroutine smther_two_x subroutine smther_two_y(gy,g,ngauss,jds,jde,ips,ipe,jps,jpe,kps,kpe) !$$$ subprogram documentation block ! . . . . ! subprogram: smther_two ! prgmmr: pondeca org: np22 date: 2006-08-01 ! ! abstract: apply second moment preserving "shapiro smoother" ! in each direction of data slab ! ! program history log: ! 2006-08-01 pondeca ! ! input argument list: ! ngauss ! jds,jde - domain indices ! ips,ipe,jps,jpe,kps,kpe - patch indices ! gy ! ! output argument list: ! g ! ! attributes: ! language: f90 ! machine: ibm RS/6000 SP ! !$$$ end documentation block use kinds,only: r_single,i_long implicit none integer(i_long),intent(in ) :: ngauss integer(i_long),intent(in ) :: jds,jde,ips,ipe,jps,jpe,kps,kpe real(r_single) ,intent(in ) :: gy(ngauss,ips:ipe,max(jds,jps-3):min(jde,jpe+3),kps:kpe) real(r_single) ,intent( out) :: g(ngauss,ips:ipe,jps:jpe,kps:kpe) ! on input: original data slab ! on ouput: filtered data slab integer(i_long) i,jm,jm3,jp,jp3,jstart,jend,j,k,n jstart=jps ; jend=jpe if(jps==jds) then j=jds ; jp=j+1 ; jp3=j+3_i_long do k=kps,kpe do i=ips,ipe do n=1,ngauss g(n,i,j,k)=.5_r_single*gy(n,i,j,k)+.28125_r_single*gy(n,i,jp,k)-.03125_r_single*gy(n,i,jp3,k) end do end do end do j=jds+1 ; jp=j+1 ; jp3=j+3_i_long ; jm=j-1 do k=kps,kpe do i=ips,ipe do n=1,ngauss g(n,i,j,k)=.5_r_single*gy(n,i,j,k)+.28125_r_single*(gy(n,i,jp,k)+gy(n,i,jm,k))-.03125_r_single*gy(n,i,jp3,k) end do end do end do j=jds+2_i_long ; jp=j+1 ; jp3=j+3_i_long ; jm=j-1 do k=kps,kpe do i=ips,ipe do n=1,ngauss g(n,i,j,k)=.5_r_single*gy(n,i,j,k)+.28125_r_single*(gy(n,i,jp,k)+gy(n,i,jm,k))-.03125_r_single*gy(n,i,jp3,k) end do end do end do jstart=jds+3_i_long end if if(jpe==jde) then j=jde-2_i_long ; jp=j+1 ; jm3=j-3_i_long ; jm=j-1 do k=kps,kpe do i=ips,ipe do n=1,ngauss g(n,i,j,k)=.5_r_single*gy(n,i,j,k)+.28125_r_single*(gy(n,i,jp,k)+gy(n,i,jm,k))-.03125_r_single*gy(n,i,jm3,k) end do end do end do j=jde-1 ; jp=j+1 ; jm3=j-3_i_long ; jm=j-1 do k=kps,kpe do i=ips,ipe do n=1,ngauss g(n,i,j,k)=.5_r_single*gy(n,i,j,k)+.28125_r_single*(gy(n,i,jp,k)+gy(n,i,jm,k))-.03125_r_single*gy(n,i,jm3,k) end do end do end do j=jde ; jm3=j-3_i_long ; jm=j-1 do k=kps,kpe do i=ips,ipe do n=1,ngauss g(n,i,j,k)=.5_r_single*gy(n,i,j,k)+.28125_r_single*gy(n,i,jm,k)-.03125_r_single*gy(n,i,jm3,k) end do end do end do jend=jde-3_i_long end if do k=kps,kpe do j=jstart,jend jp=j+1 ; jm=j-1 ; jp3=j+3_i_long ; jm3=j-3_i_long do i=ips,ipe do n=1,ngauss g(n,i,j,k)=.28125_r_single*(gy(n,i,jp,k)+gy(n,i,jm,k))+.5_r_single*gy(n,i,j,k) & -.03125_r_single*(gy(n,i,jp3,k)+gy(n,i,jm3,k)) end do end do end do end do end subroutine smther_two_y subroutine smther_two_gnorm(gnorm,ids,ide,ips,ipe) !$$$ subprogram documentation block ! . . . . ! subprogram: smther_two_gnorm ! prgmmr: ! ! abstract: ! ! program history log: ! 2009-09-30 lueken - added subprogram doc block ! ! input argument list: ! ids,ide - domain indices ! ips,ipe - patch indices ! ! output argument list: ! gnorm ! ! attributes: ! language: f90 ! machine: ! !$$$ end documentation block use kinds,only: r_single,r_double,i_long use constants,only: three,four implicit none integer(i_long),intent(in ) :: ids,ide,ips,ipe real(r_single) ,intent( out) :: gnorm(ips:ipe) integer(i_long) i,istart,iend real(r_double) const81,const82 const81=four/three const82=32._r_double/33._r_double istart=ips ; iend=ipe if(ips==ids) then i=ids gnorm(i)=const81 ! 1./(.5+.28125-.03125) i=ids+1 gnorm(i)=const82 ! 1./(.5+.28125*2.-.03125) i=ids+2_i_long gnorm(i)=const82 ! 1./(.5+.28125*2.-.03125) istart=ids+3_i_long end if if(ipe==ide) then i=ide-2_i_long gnorm(i)=const82 ! 1./(.5+.28125*2.-.03125) i=ide-1 gnorm(i)=const82 ! 1./(.5+.28125*2.-.03125) i=ide gnorm(i)=const81 ! 1./(.5+.28125-.03125) iend=ide-3_i_long end if do i=istart,iend gnorm(i)=1._r_single ! 1./(.28125*2.+.5-.03125*2.) end do end subroutine smther_two_gnorm