Skip to content

Commit

Permalink
Merge pull request #939 from fvitt/se_ic_mem_cam6_3_139
Browse files Browse the repository at this point in the history
cam6_3_143: Reduce memory footprint of IC file output in SE CSLAM
  • Loading branch information
fvitt authored Dec 20, 2023
2 parents 6df58c0 + 21af842 commit 536045c
Show file tree
Hide file tree
Showing 4 changed files with 162 additions and 42 deletions.
62 changes: 60 additions & 2 deletions doc/ChangeLog
Original file line number Diff line number Diff line change
@@ -1,5 +1,63 @@
===============================================================

Tag name: cam6_3_143
Originator(s): fvitt
Date: 20 Dec 2023
One-line Summary: Reduce memory footprint of IC file output in SE CSLAM
Github PR URL: https://github.com/ESCOMP/CAM/pull/939

Purpose of changes (include the issue number and title text for each relevant GitHub issue):

Implement memory friendly mapping of advected tracers from physics grid to dynamics grid
for output to CAM generated IC file when SE CSLAM dynamics is used. This resolves the
memory issue described in github issue #932 (Memory issues in SE CSLAM when writing IC files).

Describe any changes made to build system: N/A

Describe any changes made to the namelist: N/A

List any changes to the defaults for the boundary datasets: N/A

Describe any substantial timing or memory changes: N/A

Code reviewed by: PeterHjortLauritzen, cacraigucar

List all files eliminated: N/A

List all files added and what they do: N/A

List all existing files that have been modified, and describe the changes:
M src/dynamics/se/dycore/fvm_mapping.F90
- implement physics-to-dynmics grid mapping routine for a single tracer

M src/dynamics/se/dycore/fvm_mod.F90
- implement exchange buffer for a single tracer

M src/dynamics/se/stepon.F90
- remap to dynamics grid and output to the cam generated IC one tracer at a time
- add memory allocate checks

If there were any failures reported from running test_driver.sh on any test
platform, and checkin with these failures has been OK'd by the gatekeeper,
then copy the lines from the td.*.status files for the failed tests to the
appropriate machine below. All failed tests must be justified.

derecho/intel/aux_cam:
ERP_Ln9_Vnuopc.C96_C96_mg17.F2000climo.derecho_intel.cam-outfrq9s_mg3 (Overall: PEND) details:
ERP_Ln9_Vnuopc.f09_f09_mg17.FCSD_HCO.derecho_intel.cam-outfrq9s (Overall: FAIL) details:
- pre-existing failures

izumi/nag/aux_cam:
DAE_Vnuopc.f45_f45_mg37.FHS94.izumi_nag.cam-dae (Overall: FAIL) details:
- pre-existing failure

izumi/gnu/aux_cam: All PASS

Summarize any changes to answers: bit-for-bit unchanged

===============================================================
===============================================================

Tag name: cam6_3_142
Originator(s): jedwards, ekluzek, goldy, bstephens82, eaton
Date: 13 December 2023
Expand All @@ -12,10 +70,10 @@ Purpose of changes (include the issue number and title text for each relevant Gi
- remove the code in cam_history that was checking fincl input for the
names of zonal mean fields and writing a message to the atm log file if
the dycore was not FV. These messages are now incorrect and have been
removed.
removed.

. https://github.com/ESCOMP/CAM/issues/837 - Remove setting of CTSM fsurdat
file and LND_DOMAIN_FILE in mpas testmods
file and LND_DOMAIN_FILE in mpas testmods
- The LND_DOMAIN_FILE is no longer needed. The fsurdat file is now
appropriately set by CTSM.

Expand Down
54 changes: 50 additions & 4 deletions src/dynamics/se/dycore/fvm_mapping.F90
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,12 @@ module fvm_mapping
real(kind=r8), allocatable, dimension(:,:,:,:) :: save_overlap_area
integer , allocatable, dimension(:,:,:,:,:) :: save_overlap_idx
integer , allocatable, dimension(:,:,:,:) :: save_num_overlap

interface fvm2dyn
module procedure fvm2dynt1
module procedure fvm2dyntn
end interface fvm2dyn

contains
!
! map all mass variables from gll to fvm
Expand Down Expand Up @@ -220,7 +226,8 @@ subroutine phys2dyn_forcings_fvm(elem, fvm, hybrid,nets,nete,no_cslam, tl_f, tl_
deallocate(fld_phys,llimiter,fld_gll,qgll)
end subroutine phys2dyn_forcings_fvm

subroutine fvm2dyn(fld_fvm,fld_gll,hybrid,nets,nete,numlev,num_flds,fvm,llimiter)
! for multiple fields
subroutine fvm2dyntn(fld_fvm,fld_gll,hybrid,nets,nete,numlev,num_flds,fvm,llimiter)
use dimensions_mod, only: np, nhc, nc
use hybrid_mod , only: hybrid_t
use bndry_mod , only: ghost_exchange
Expand All @@ -244,7 +251,7 @@ subroutine fvm2dyn(fld_fvm,fld_gll,hybrid,nets,nete,numlev,num_flds,fvm,llimiter
do ie=nets,nete
call ghostpack(ghostBufQnhc_s, fld_fvm(:,:,:,:,ie),numlev*num_flds,0,ie)
end do
call ghost_exchange(hybrid,ghostbufQnhc_s,location='fvm2dyn')
call ghost_exchange(hybrid,ghostbufQnhc_s,location='fvm2dyntn')
do ie=nets,nete
call ghostunpack(ghostbufQnhc_s, fld_fvm(:,:,:,:,ie),numlev*num_flds,0,ie)
end do
Expand All @@ -257,7 +264,46 @@ subroutine fvm2dyn(fld_fvm,fld_gll,hybrid,nets,nete,numlev,num_flds,fvm,llimiter
call tensor_lagrange_interp(fvm(ie)%cubeboundary,np,nc,nhc,numlev,num_flds,fld_fvm(:,:,:,:,ie),&
fld_gll(:,:,:,:,ie),llimiter,iwidth,fvm(ie)%norm_elem_coord)
end do
end subroutine fvm2dyn
end subroutine fvm2dyntn

! for single field
subroutine fvm2dynt1(fld_fvm,fld_gll,hybrid,nets,nete,numlev,fvm,llimiter)
use dimensions_mod, only: np, nhc, nc
use hybrid_mod , only: hybrid_t
use bndry_mod , only: ghost_exchange
use edge_mod , only: ghostpack,ghostunpack
use fvm_mod , only: ghostBufQnhc_t1
!
integer , intent(in) :: nets,nete,numlev
real (kind=r8), intent(inout) :: fld_fvm(1-nhc:nc+nhc,1-nhc:nc+nhc,numlev,1,nets:nete)
real (kind=r8), intent(out) :: fld_gll(np,np,numlev,1,nets:nete)
type (hybrid_t) , intent(in) :: hybrid
type(fvm_struct) , intent(in) :: fvm(nets:nete)
logical , intent(in) :: llimiter(1)
integer :: ie, iwidth
!
!*********************************************
!
! halo exchange
!
!*********************************************
!
do ie=nets,nete
call ghostpack(ghostBufQnhc_t1, fld_fvm(:,:,:,1,ie),numlev,0,ie)
end do
call ghost_exchange(hybrid,ghostbufQnhc_t1,location='fvm2dynt1')
do ie=nets,nete
call ghostunpack(ghostbufQnhc_t1, fld_fvm(:,:,:,1,ie),numlev,0,ie)
end do
!
! mapping
!
iwidth=2
do ie=nets,nete
call tensor_lagrange_interp(fvm(ie)%cubeboundary,np,nc,nhc,numlev,1,fld_fvm(:,:,:,:,ie),&
fld_gll(:,:,:,:,ie),llimiter,iwidth,fvm(ie)%norm_elem_coord)
end do
end subroutine fvm2dynt1


subroutine fill_halo_phys(fld_phys,hybrid,nets,nete,num_lev,num_flds)
Expand Down Expand Up @@ -451,7 +497,7 @@ subroutine dyn2phys_all_vars(nets,nete,elem,fvm,&
tmp = 1.0_r8
inv_area = 1.0_r8/dyn2phys(tmp,elem(ie)%metdet(:,:))
phis_phys(:,ie) = RESHAPE(dyn2phys(elem(ie)%state%phis(:,:),elem(ie)%metdet(:,:),inv_area),SHAPE(phis_phys(:,ie)))
ps_phys(:,ie) = ptop
ps_phys(:,ie) = ptop
if (nc.ne.fv_nphys) then
tmp = 1.0_r8
do k=1,nlev
Expand Down
2 changes: 2 additions & 0 deletions src/dynamics/se/dycore/fvm_mod.F90
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ module fvm_mod

type (EdgeBuffer_t) :: edgeveloc
type (EdgeBuffer_t), public :: ghostBufQnhc_s
type (EdgeBuffer_t), public :: ghostBufQnhc_t1
type (EdgeBuffer_t), public :: ghostBufQnhc_vh
type (EdgeBuffer_t), public :: ghostBufQnhc_h
type (EdgeBuffer_t), public :: ghostBufQ1_h
Expand Down Expand Up @@ -487,6 +488,7 @@ subroutine fvm_init2(elem,fvm,hybrid,nets,nete)
! changes the values for reverse

call initghostbuffer(hybrid%par,ghostBufQnhc_s,elem,nlev*(ntrac+1),nhc,nc,nthreads=1)
call initghostbuffer(hybrid%par,ghostBufQnhc_t1,elem,nlev, nhc,nc,nthreads=1)
call initghostbuffer(hybrid%par,ghostBufQnhc_h,elem,nlev*(ntrac+1),nhc,nc,nthreads=horz_num_threads)
call initghostbuffer(hybrid%par,ghostBufQnhc_vh,elem,nlev*(ntrac+1),nhc,nc,nthreads=vert_num_threads*horz_num_threads)
klev = kmax_jet-kmin_jet+1
Expand Down
86 changes: 50 additions & 36 deletions src/dynamics/se/stepon.F90
Original file line number Diff line number Diff line change
Expand Up @@ -282,9 +282,11 @@ subroutine diag_dynvar_ic(elem, fvm)
real(r8), allocatable :: ftmp(:,:,:)
real(r8), allocatable :: fld_fvm(:,:,:,:,:), fld_gll(:,:,:,:,:)
real(r8), allocatable :: fld_2d(:,:)
logical, allocatable :: llimiter(:)
logical :: llimiter(1)
real(r8) :: qtmp(np,np,nlev), dp_ref(np,np,nlev), ps_ref(np,np)
real(r8), allocatable :: factor_array(:,:,:)
integer :: astat
character(len=*), parameter :: prefix = 'diag_dynvar_ic: '
!----------------------------------------------------------------------------

tl_f = timelevel%n0
Expand Down Expand Up @@ -393,19 +395,28 @@ subroutine diag_dynvar_ic(elem, fvm)
end if

if (write_inithist()) then
allocate(fld_2d(np,np))
do ie = 1, nelemd
call get_ps(elem(ie)%state%Qdp(:,:,:,:,tl_Qdp), thermodynamic_active_species_idx_dycore,&
elem(ie)%state%dp3d(:,:,:,tl_f),fld_2d,hyai(1)*ps0)
do j = 1, np
do i = 1, np
ftmp(i+(j-1)*np,1,1) = fld_2d(i,j)
allocate(fld_2d(np,np))
do ie = 1, nelemd
call get_ps(elem(ie)%state%Qdp(:,:,:,:,tl_Qdp), thermodynamic_active_species_idx_dycore,&
elem(ie)%state%dp3d(:,:,:,tl_f),fld_2d,hyai(1)*ps0)
do j = 1, np
do i = 1, np
ftmp(i+(j-1)*np,1,1) = fld_2d(i,j)
end do
end do
end do
call outfld('PS&IC', ftmp(:,1,1), npsq, ie)
end do
deallocate(fld_2d)
if (fv_nphys < 1) allocate(factor_array(np,np,nlev))
call outfld('PS&IC', ftmp(:,1,1), npsq, ie)
end do
deallocate(fld_2d)
endif

deallocate(ftmp)

if (write_inithist()) then

if (fv_nphys < 1) then
allocate(factor_array(np,np,nlev),stat=astat)
if (astat /= 0) call endrun(prefix//"Allocate factor_array failed")
endif

do ie = 1, nelemd
call outfld('T&IC', RESHAPE(elem(ie)%state%T(:,:,:,tl_f), (/npsq,nlev/)), npsq, ie)
Expand All @@ -414,7 +425,7 @@ subroutine diag_dynvar_ic(elem, fvm)

if (fv_nphys < 1) then
call get_sum_species(elem(ie)%state%Qdp(:,:,:,:,tl_qdp), &
thermodynamic_active_species_idx_dycore, factor_array,dp_dry=elem(ie)%state%dp3d(:,:,:,tl_f))
thermodynamic_active_species_idx_dycore, factor_array,dp_dry=elem(ie)%state%dp3d(:,:,:,tl_f))
factor_array(:,:,:) = 1.0_r8/factor_array(:,:,:)
do m_cnst = 1, qsize
if (cnst_type(m_cnst) == 'wet') then
Expand All @@ -436,40 +447,43 @@ subroutine diag_dynvar_ic(elem, fvm)
hybrid = config_thread_region(par,'serial')
call get_loop_ranges(hybrid, ibeg=nets, iend=nete)

allocate(fld_fvm(1-nhc:nc+nhc,1-nhc:nc+nhc,nlev,ntrac,nets:nete))
allocate(fld_gll(np,np,nlev,ntrac,nets:nete))
allocate(llimiter(ntrac))
allocate(factor_array(nc,nc,nlev))
allocate(fld_fvm(1-nhc:nc+nhc,1-nhc:nc+nhc,nlev,1,nets:nete),stat=astat)
if (astat /= 0) call endrun(prefix//"Allocate fld_fvm failed")
allocate(fld_gll(np,np,nlev,1,nets:nete),stat=astat)
if (astat /= 0) call endrun(prefix//"Allocate fld_gll failed")
allocate(factor_array(nc,nc,nlev),stat=astat)
if (astat /= 0) call endrun(prefix//"Allocate factor_array failed")

llimiter = .true.
do ie = nets, nete
call get_sum_species(fvm(ie)%c(1:nc,1:nc,:,:),thermodynamic_active_species_idx,factor_array)
factor_array(:,:,:) = 1.0_r8/factor_array(:,:,:)
do m_cnst = 1, ntrac
if (cnst_type(m_cnst) == 'wet') then
fld_fvm(1:nc,1:nc,:,m_cnst,ie) = fvm(ie)%c(1:nc,1:nc,:,m_cnst)*factor_array(:,:,:)
else
fld_fvm(1:nc,1:nc,:,m_cnst,ie) = fvm(ie)%c(1:nc,1:nc,:,m_cnst)
end if
end do
end do

call fvm2dyn(fld_fvm, fld_gll, hybrid, nets, nete, nlev, ntrac, fvm(nets:nete), llimiter)
do m_cnst = 1, ntrac
do ie = nets, nete

call get_sum_species(fvm(ie)%c(1:nc,1:nc,:,:),thermodynamic_active_species_idx,factor_array)
factor_array(:,:,:) = 1.0_r8/factor_array(:,:,:)

if (cnst_type(m_cnst) == 'wet') then
fld_fvm(1:nc,1:nc,:,1,ie) = fvm(ie)%c(1:nc,1:nc,:,m_cnst)*factor_array(:,:,:)
else
fld_fvm(1:nc,1:nc,:,1,ie) = fvm(ie)%c(1:nc,1:nc,:,m_cnst)
end if
end do

call fvm2dyn(fld_fvm, fld_gll, hybrid, nets, nete, nlev, fvm(nets:nete), llimiter)

do ie = nets, nete
do m_cnst = 1, ntrac
do ie = nets, nete
call outfld(trim(cnst_name(m_cnst))//'&IC', &
RESHAPE(fld_gll(:,:,:,m_cnst,ie), (/npsq,nlev/)), npsq, ie)
RESHAPE(fld_gll(:,:,:,:,ie), (/npsq,nlev/)), npsq, ie)
end do
end do

deallocate(fld_fvm)
deallocate(fld_gll)
deallocate(llimiter)
end if

deallocate(factor_array)
end if ! if (write_inithist)

deallocate(ftmp)
end if ! if (write_inithist)

end subroutine diag_dynvar_ic

Expand Down

0 comments on commit 536045c

Please sign in to comment.