! Copyright 2014 College of William and Mary ! ! Licensed under the Apache License, Version 2.0 (the "License"); ! you may not use this file except in compliance with the License. ! You may obtain a copy of the License at ! ! http://www.apache.org/licenses/LICENSE-2.0 ! ! Unless required by applicable law or agreed to in writing, software ! distributed under the License is distributed on an "AS IS" BASIS, ! WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ! See the License for the specific language governing permissions and ! limitations under the License. !=============================================================================== !=============================================================================== ! ELCIRC LINEAR SOLVER ROUTINES ! ! subroutine solve_jcg ! subroutine solve_jcg_qnon (non-hydrostatic) ! subroutine tridag ! !=============================================================================== !=============================================================================== subroutine solve_jcg(mnei_p1,np1,npa1,itime,moitn,mxitn,rtol,s,x,b,bc,lbc) !------------------------------------------------------------------------------- ! Jacobi Preconditioned Conjugate Gradient for elevation-like variables. !------------------------------------------------------------------------------- !#ifdef USE_MPIMODULE ! use mpi !#endif use schism_glbl, only : rkind,np,npa,wtimer,iplg,ipgl,nnp,indnd,errmsg use schism_msgp implicit none !#ifndef USE_MPIMODULE include 'mpif.h' !#endif integer,intent(in) :: mnei_p1,np1,npa1 !for dimensioning only integer,intent(in) :: itime !for outputting/debug only integer,intent(in) :: moitn,mxitn !output interval and max iterations real(rkind),intent(in) :: rtol !relative tolerance real(rkind),intent(in) :: s(0:mnei_p1,np1) !sparse matrix real(rkind),intent(inout) :: x(npa1) !eta2 -- with initial guess real(rkind),intent(in) :: b(np1) !qel real(rkind),intent(in) :: bc(npa1) !b.c. (elbc) logical,intent(in) :: lbc(npa1) !b.c. flag integer :: itn,ip,jp,j,omp_get_num_threads,omp_get_thread_num real(rkind) :: rdotrl,rdotr,rdotzl,rdotz,old_rdotz,beta,alphal,alpha,cwtmp real(rkind) :: rtol2,rdotr0 real(rkind) :: z(np1),r(np1),p(npa1),sp(np1) integer :: inz(mnei_p1,np1),nnz(np1),icolor_intf_node(np1) real(rkind) :: snz(0:mnei_p1,np1),bb(np1) !------------------------------------------------------------------------------- itn=0 !$OMP parallel default(shared) private(ip,j,jp) ! Load local storage for non-zeros of sparse matrix and include essential BCs ! Since the calcualtion is done for resident nodes only, entire row of ! the global matrix will be contained in the local row of the matrix. !$OMP workshare nnz=0 !# of non-zero entries for each node !1: not interface node btw MPI processes; 0: interface node icolor_intf_node=0 !$OMP end workshare !$OMP do do ip=1,np if(lbc(ip)) then !nnz(ip)=0: no non-zero entries besides diagonal if(bc(ip)<-9998.d0) call parallel_abort('JCG: wrong b.c.') snz(0,ip)=1 !modified sparsem x(ip)=bc(ip) bb(ip)=bc(ip) !modified rrhs (qel) else !not essential b.c. snz(0,ip)=s(0,ip) bb(ip)=b(ip) do j=1,nnp(ip) jp=indnd(j,ip) if(lbc(jp)) then if(bc(jp)<-9998.d0) call parallel_abort('JCG: wrong b.c. (2)') bb(ip)=bb(ip)-s(j,ip)*bc(jp) else nnz(ip)=nnz(ip)+1 inz(nnz(ip),ip)=jp !local node #; can be ghost snz(nnz(ip),ip)=s(j,ip) endif enddo endif enddo !i=1,np !$OMP end do ! Initialization -- assume x is initial guess !#ifdef INCLUDE_TIMING ! cwtmp=mpi_wtime() !#endif ! call exchange_p2d(x) !update x ghost values !#ifdef INCLUDE_TIMING ! wtimer(7,2)=wtimer(7,2)+mpi_wtime()-cwtmp !#endif ! Residual !$OMP single rdotrl=0 !$OMP end single !$OMP do do ip=1,np if(snz(0,ip)==0.d0) call parallel_abort('JCG: zero diagonal') sp(ip)=snz(0,ip)*x(ip)+sum(snz(1:nnz(ip),ip)*x(inz(1:nnz(ip),ip))) ! do j=1,nnz(ip) ! sp(ip)=sp(ip)+snz(j,ip)*x(inz(j,ip)) ! enddo r(ip)=bb(ip)-sp(ip) !residual if(associated(ipgl(iplg(ip))%next)) then !interface node if(ipgl(iplg(ip))%next%rank=mxitn) then !$OMP master if(myrank==0) write(33,*)'JCG did not converge in ',mxitn,' iterations' !$OMP end master exit endif !$OMP do do ip=1,np !snz(0,ip)/=0 checked z(ip)=r(ip)/snz(0,ip) !jacobi enddo !$OMP end do !#ifdef INCLUDE_TIMING ! cwtmp=mpi_wtime() !#endif ! call exchange_p2d(z) !jacobi !#ifdef INCLUDE_TIMING ! wtimer(7,2)=wtimer(7,2)+mpi_wtime()-cwtmp !#endif !$OMP single rdotzl=0.d0 !$OMP end single !!!$OMP do reduction(+: rdotzl) ! do ip=1,np ! if(associated(ipgl(iplg(ip))%next)) then !interface node ! if(ipgl(iplg(ip))%next%rank0 enddo !ip ! Block-Jacobi pre-conditioner rat_max2=-1.d0 !max. horizontal-vertical ratio threshold_rat=0.4d0 !threshold for grid aspect ratio (dz/dx) large_rat(:)=.false. do ip=1,np if(lhbc(ip)) cycle !Compute horizontal-vertical ratio dx_min2=1.d25 do j=1,nnp(ip) nd=indnd(j,ip) dist2=(xnd(ip)-xnd(nd))**2.d0+(ynd(ip)-ynd(nd))**2.d0 dx_min2=min(dx_min2,dist2) enddo !j dz_max2=-1 do k=kbp(ip),nvrt-1 dist2=(znl(k+1,ip)-znl(k,ip))**2.d0 dz_max2=max(dz_max2,dist2) enddo !k if(dx_min2<=0.d0) call parallel_abort('CG2: dx_min2<=0') tmp=dz_max2/dx_min2 rat_max2=max(rat_max2,tmp) if(sqrt(tmp)>=threshold_rat) large_rat(ip)=.true. if(large_rat(ip)) cycle !Invert matrix only for small-aspect-ratio nodes ndim=nvrt-kbp_e(ip) do k=kbp_e(ip),nvrt-1 !no F.S. kin=k-kbp_e(ip)+1 if(k/=kbp_e(ip)) alow(kin)=qmatr(k,-1,0,ip) bdia(kin)=qmatr(k,0,0,ip) cupp(kin)=qmatr(k,1,0,ip) enddo !k !RHS rrhs=0.d0 do l=1,ndim rrhs(l,l)=1.d0 enddo !l call tridag(nvrt,nvrt,ndim,ndim,alow,bdia,cupp,rrhs,soln,gam) !indice order of blockj: (row #, column #, node) blockj(kbp_e(ip):(nvrt-1),kbp_e(ip):(nvrt-1),ip)=transpose(soln(1:ndim,1:ndim)) do k=kbp_e(ip),nvrt-1 !Check symmetry do l=kbp_e(ip),nvrt-1 if(abs(blockj(k,l,ip)-blockj(l,k,ip))>1.d-4) then write(errmsg,*)'Not symmetric:',iplg(ip),k,l,blockj(k,l,ip)-blockj(l,k,ip) call parallel_abort(errmsg) endif enddo !l !write(12,*)1/qmatr(k,0,0,ip),ip,k,blockj(k,kbp_e(ip):(nvrt-1),ip) enddo !k enddo !ip=1,np #ifdef INCLUDE_TIMING cwtmp=mpi_wtime() #endif call mpi_allreduce(rat_max2,rat_max2_gb,1,rtype,MPI_MAX,comm,ierr) if(myrank==0) then write(29,'(//a,i8)') '********CG2 Solve at timestep ',itime write(29,*)'done pre-conditioner' if(rat_max2_gb>0) write(16,*)'Max. vertical/horizontal ratio and threshold=',sqrt(rat_max2_gb),threshold_rat endif !if(rat_max2_gb>0.16) call parallel_abort('JCG2: grid aspect ratio exceeded') #ifdef INCLUDE_TIMING wtimer(7,2)=wtimer(7,2)+mpi_wtime()-cwtmp #endif ! Residual qhat=0.d0 !initial guess rdotrl=0.d0 rr=0.d0 !for b.c. pts etc do ip=1,np if(lhbc(ip)) cycle do k=kbp_e(ip),nvrt-1 !no F.S. if(qmatr(k,0,0,ip)<=0.d0) call parallel_abort('JCG2: zero diagonal') rr(k,ip)=qir(k,ip) !residual since qhat=0 if(associated(ipgl(iplg(ip))%next)) then !interface node if(ipgl(iplg(ip))%next%rank=mxitn) then if(myrank==0) write(29,*)'JCG2 did not converge in ',mxitn,' iterations' exit endif zz=0.d0 !for b.c. pts do ip=1,np if(lhbc(ip)) cycle !Block or simple Jacobian do k=kbp_e(ip),nvrt-1 if(large_rat(ip)) then !simple zz(k,ip)=rr(k,ip)/qmatr(k,0,0,ip) !diagonal checked else !block do l=kbp_e(ip),nvrt-1 zz(k,ip)=zz(k,ip)+blockj(k,l,ip)*rr(l,ip) enddo !l endif !large_rat enddo !k enddo !ip rdotzl=0.d0 do ip=1,np if(lhbc(ip)) cycle if(associated(ipgl(iplg(ip))%next)) then !interface node if(ipgl(iplg(ip))%next%rank=nvrt) cycle !repeat F.S. essential b.c. (no harm) do j=0,nnp(ip) if(j==0) then nd=ip else nd=indnd(j,ip) endif if(lhbc(nd).or.k+l==nvrt) cycle !essential b.c. sp(k,ip)=sp(k,ip)+qmatr(k,l,j,ip)*pp(k+l,nd) enddo !j enddo !l if(associated(ipgl(iplg(ip))%next)) then !interface node if(ipgl(iplg(ip))%next%rank0) cycle if(lhbc(ip)) cycle if(associated(ipgl(iplg(ip))%next)) then !interface node if(ipgl(iplg(ip))%next%rank= 1') if(nc>nvec) call parallel_abort('TRIDAG: Increase # of columns') if(b(1)==0.d0) call parallel_abort('TRIDAG: b(1)=0') bet=b(1) u(1:nc,1)=r(1:nc,1)/bet ifl=0 !flag for abort (for better vectorization) do j=2,n gam(j)=c(j-1)/bet bet=b(j)-a(j)*gam(j) if(bet==0.d0) ifl=1 u(1:nc,j)=(r(1:nc,j)-a(j)*u(1:nc,j-1))/bet enddo !j if(ifl==1) call parallel_abort('TRIDAG: failed') ! Backsubstitution do j=n-1,1,-1 u(1:nc,j)=u(1:nc,j)-gam(j+1)*u(1:nc,j+1) enddo end subroutine tridag