From 07ea0d766ce7699ae71a57178a2c4c0d7aa9ab72 Mon Sep 17 00:00:00 2001 From: Dan Kokron Date: Tue, 8 Aug 2023 16:15:02 +0000 Subject: [PATCH 01/18] Initial working proof-of-concept to enable MPI-IO collective optimizations within NetCDF. This has been tested on Cactus using the RRFS application with up to 24 ensemble members. --- fms2_io/include/netcdf_read_data.inc | 12 ++++++++++++ fms2_io/netcdf_io.F90 | 12 +++++++++++- 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/fms2_io/include/netcdf_read_data.inc b/fms2_io/include/netcdf_read_data.inc index 4bfd427970..af91389f57 100644 --- a/fms2_io/include/netcdf_read_data.inc +++ b/fms2_io/include/netcdf_read_data.inc @@ -364,6 +364,10 @@ subroutine netcdf_read_data_2d(fileobj, variable_name, buf, unlim_dim_level, & type is (real(kind=r4_kind)) err = nf90_get_var(fileobj%ncid, varid, buf, start=c, count=e) type is (real(kind=r8_kind)) + if(string_compare(trim(fileobj%path), "INPUT/C3463_grid.tile7.nc", .true.) .or. & + string_compare(trim(fileobj%path), "INPUT/sfc_data.nc" , .true.) ) then + err = nf90_var_par_access(fileobj%ncid, varid, nf90_collective) + endif err = nf90_get_var(fileobj%ncid, varid, buf, start=c, count=e) class default call error("Unsupported variable type: "//trim(append_error_msg)) @@ -454,8 +458,16 @@ subroutine netcdf_read_data_3d(fileobj, variable_name, buf, unlim_dim_level, & type is (integer(kind=i8_kind)) err = nf90_get_var(fileobj%ncid, varid, buf, start=c, count=e) type is (real(kind=r4_kind)) + if(string_compare(trim(fileobj%path), "INPUT/fv_tracer.res.tile1.nc" , .true.) .or. & + string_compare(trim(fileobj%path), "INPUT/fv_core.res.tile1.nc" , .true.) ) then + err = nf90_var_par_access(fileobj%ncid, varid, nf90_collective) + endif err = nf90_get_var(fileobj%ncid, varid, buf, start=c, count=e) type is (real(kind=r8_kind)) + if(string_compare(trim(fileobj%path), "INPUT/phy_data.nc" , .true.) .or. & + string_compare(trim(fileobj%path), "INPUT/sfc_data.nc", .true.) ) then + err = nf90_var_par_access(fileobj%ncid, varid, nf90_collective) + endif err = nf90_get_var(fileobj%ncid, varid, buf, start=c, count=e) class default call error("Unsupported variable type: "//trim(append_error_msg)) diff --git a/fms2_io/netcdf_io.F90 b/fms2_io/netcdf_io.F90 index b66c6f0526..f2d707771c 100644 --- a/fms2_io/netcdf_io.F90 +++ b/fms2_io/netcdf_io.F90 @@ -32,6 +32,7 @@ module netcdf_io_mod use mpp_mod use fms_io_utils_mod use platform_mod +use mpi, only: MPI_COMM_WORLD, MPI_INFO_NULL implicit none private @@ -644,7 +645,16 @@ function netcdf_file_open(fileobj, path, mode, nc_format, pelist, is_restart, do endif if (string_compare(mode, "read", .true.)) then - err = nf90_open(trim(fileobj%path), nf90_nowrite, fileobj%ncid, chunksize=fms2_ncchksz) + if(string_compare(trim(fileobj%path), "INPUT/phy_data.nc" , .true.) .or. & + string_compare(trim(fileobj%path), "INPUT/fv_tracer.res.tile1.nc", .true.) .or. & + string_compare(trim(fileobj%path), "INPUT/sfc_data.nc" , .true.) .or. & + string_compare(trim(fileobj%path), "INPUT/C3463_grid.tile7.nc" , .true.) .or. & + string_compare(trim(fileobj%path), "INPUT/C3463_grid.tile7.halo3.nc", .true.) .or. & + string_compare(trim(fileobj%path), "INPUT/fv_core.res.tile1.nc" , .true.) ) then + err = nf90_open(trim(fileobj%path), ior(NF90_NOWRITE, NF90_MPIIO), fileobj%ncid, comm=MPI_COMM_WORLD, info=MPI_INFO_NULL) + else + err = nf90_open(trim(fileobj%path), nf90_nowrite, fileobj%ncid, chunksize=fms2_ncchksz) + endif elseif (string_compare(mode, "append", .true.)) then err = nf90_open(trim(fileobj%path), nf90_write, fileobj%ncid, chunksize=fms2_ncchksz) elseif (string_compare(mode, "write", .true.)) then From b2e41ad87dcbf62efcb3248409a6115ce3ea9a00 Mon Sep 17 00:00:00 2001 From: Dan Kokron Date: Mon, 2 Oct 2023 10:55:31 +0000 Subject: [PATCH 02/18] Use mpp_comm_private instead of MPI_COMM_WORLD for parallel IO --- fms2_io/netcdf_io.F90 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fms2_io/netcdf_io.F90 b/fms2_io/netcdf_io.F90 index f2d707771c..25839c9bad 100644 --- a/fms2_io/netcdf_io.F90 +++ b/fms2_io/netcdf_io.F90 @@ -32,7 +32,7 @@ module netcdf_io_mod use mpp_mod use fms_io_utils_mod use platform_mod -use mpi, only: MPI_COMM_WORLD, MPI_INFO_NULL +use mpi, only: MPI_INFO_NULL implicit none private @@ -651,7 +651,7 @@ function netcdf_file_open(fileobj, path, mode, nc_format, pelist, is_restart, do string_compare(trim(fileobj%path), "INPUT/C3463_grid.tile7.nc" , .true.) .or. & string_compare(trim(fileobj%path), "INPUT/C3463_grid.tile7.halo3.nc", .true.) .or. & string_compare(trim(fileobj%path), "INPUT/fv_core.res.tile1.nc" , .true.) ) then - err = nf90_open(trim(fileobj%path), ior(NF90_NOWRITE, NF90_MPIIO), fileobj%ncid, comm=MPI_COMM_WORLD, info=MPI_INFO_NULL) + err = nf90_open(trim(fileobj%path), ior(NF90_NOWRITE, NF90_MPIIO), fileobj%ncid, comm=mpp_comm_private, info=MPI_INFO_NULL) else err = nf90_open(trim(fileobj%path), nf90_nowrite, fileobj%ncid, chunksize=fms2_ncchksz) endif From 43838acfa2beb8098042834b4defb7f5838baff2 Mon Sep 17 00:00:00 2001 From: Dan Kokron Date: Tue, 7 Nov 2023 21:20:59 +0000 Subject: [PATCH 03/18] Mods to allow user control of collective reads from input.nml. Default behavior is unchanged, user has to activate collective reads --- fms2_io/fms2_io.F90 | 12 +++++++++-- fms2_io/include/netcdf_read_data.inc | 18 ++++++----------- fms2_io/netcdf_io.F90 | 30 ++++++++++++++++++++-------- mpp/mpp.F90 | 2 +- 4 files changed, 39 insertions(+), 23 deletions(-) diff --git a/fms2_io/fms2_io.F90 b/fms2_io/fms2_io.F90 index 00f6046a67..dd055f526a 100644 --- a/fms2_io/fms2_io.F90 +++ b/fms2_io/fms2_io.F90 @@ -386,8 +386,11 @@ module fms2_io_mod integer :: deflate_level = default_deflate_level !< Netcdf deflate level to use in nf90_def_var !! (integer between 1 to 9) logical :: shuffle = .false. !< Flag indicating whether to use the netcdf shuffle filter + +integer :: num_collective=0 !< The number of files targetted for MPIIO collective treatment + namelist / fms2_io_nml / & - ncchksz, netcdf_default_format, header_buffer_val, deflate_level, shuffle + ncchksz, netcdf_default_format, header_buffer_val, deflate_level, shuffle, num_collective contains @@ -413,7 +416,12 @@ subroutine fms2_io_init () call mpp_error(FATAL, & "deflate_level in fms2_io_nml must be a positive number between 1 and 9 as it is required by NetCDF") endif - call netcdf_io_init (ncchksz,header_buffer_val,netcdf_default_format, deflate_level, shuffle) + if (num_collective .lt. 0) then + call mpp_error(FATAL, & + "num_collective in fms2_io_nml must be a positive number (0+)") + endif + + call netcdf_io_init (ncchksz,header_buffer_val,netcdf_default_format, deflate_level, shuffle, num_collective) call blackboxio_init (ncchksz) !> Mark the fms2_io as initialized fms2_io_is_initialized = .true. diff --git a/fms2_io/include/netcdf_read_data.inc b/fms2_io/include/netcdf_read_data.inc index af91389f57..a365365599 100644 --- a/fms2_io/include/netcdf_read_data.inc +++ b/fms2_io/include/netcdf_read_data.inc @@ -355,6 +355,7 @@ subroutine netcdf_read_data_2d(fileobj, variable_name, buf, unlim_dim_level, & c(unlim_dim_index) = unlim_dim_level endif if (fileobj%is_root) then + !write(6,'("netcdf_read_data_2d:"3A)') trim(fileobj%path), ' ', trim(variable_name) varid = get_variable_id(fileobj%ncid, trim(variable_name), msg=append_error_msg) select type(buf) type is (integer(kind=i4_kind)) @@ -362,12 +363,10 @@ subroutine netcdf_read_data_2d(fileobj, variable_name, buf, unlim_dim_level, & type is (integer(kind=i8_kind)) err = nf90_get_var(fileobj%ncid, varid, buf, start=c, count=e) type is (real(kind=r4_kind)) + if(fileobj%use_collective) err = nf90_var_par_access(fileobj%ncid, varid, nf90_collective) err = nf90_get_var(fileobj%ncid, varid, buf, start=c, count=e) type is (real(kind=r8_kind)) - if(string_compare(trim(fileobj%path), "INPUT/C3463_grid.tile7.nc", .true.) .or. & - string_compare(trim(fileobj%path), "INPUT/sfc_data.nc" , .true.) ) then - err = nf90_var_par_access(fileobj%ncid, varid, nf90_collective) - endif + if(fileobj%use_collective) err = nf90_var_par_access(fileobj%ncid, varid, nf90_collective) err = nf90_get_var(fileobj%ncid, varid, buf, start=c, count=e) class default call error("Unsupported variable type: "//trim(append_error_msg)) @@ -451,6 +450,7 @@ subroutine netcdf_read_data_3d(fileobj, variable_name, buf, unlim_dim_level, & c(unlim_dim_index) = unlim_dim_level endif if (fileobj%is_root) then + !write(6,'("netcdf_read_data_3d:"3A)') trim(fileobj%path),' ',trim(variable_name) varid = get_variable_id(fileobj%ncid, trim(variable_name), msg=append_error_msg) select type(buf) type is (integer(kind=i4_kind)) @@ -458,16 +458,10 @@ subroutine netcdf_read_data_3d(fileobj, variable_name, buf, unlim_dim_level, & type is (integer(kind=i8_kind)) err = nf90_get_var(fileobj%ncid, varid, buf, start=c, count=e) type is (real(kind=r4_kind)) - if(string_compare(trim(fileobj%path), "INPUT/fv_tracer.res.tile1.nc" , .true.) .or. & - string_compare(trim(fileobj%path), "INPUT/fv_core.res.tile1.nc" , .true.) ) then - err = nf90_var_par_access(fileobj%ncid, varid, nf90_collective) - endif + if(fileobj%use_collective) err = nf90_var_par_access(fileobj%ncid, varid, nf90_collective) err = nf90_get_var(fileobj%ncid, varid, buf, start=c, count=e) type is (real(kind=r8_kind)) - if(string_compare(trim(fileobj%path), "INPUT/phy_data.nc" , .true.) .or. & - string_compare(trim(fileobj%path), "INPUT/sfc_data.nc", .true.) ) then - err = nf90_var_par_access(fileobj%ncid, varid, nf90_collective) - endif + if(fileobj%use_collective) err = nf90_var_par_access(fileobj%ncid, varid, nf90_collective) err = nf90_get_var(fileobj%ncid, varid, buf, start=c, count=e) class default call error("Unsupported variable type: "//trim(append_error_msg)) diff --git a/fms2_io/netcdf_io.F90 b/fms2_io/netcdf_io.F90 index 25839c9bad..a78a52bd7b 100644 --- a/fms2_io/netcdf_io.F90 +++ b/fms2_io/netcdf_io.F90 @@ -61,6 +61,10 @@ module netcdf_io_mod logical, private :: fms2_shuffle = .false. !< Flag indicating whether to use the netcdf shuffle filter logical, private :: fms2_is_netcdf4 = .false. !< Flag indicating whether the default netcdf file format is netcdf4 +integer :: fms2_num_collective=0 !< The number of files targetted for MPIIO collective treatment +character (len=NF90_MAX_NAME), allocatable :: fn_collective(:) +namelist / fms2_io_collective_nml / fn_collective + !> @} !> @brief information needed fr regional restart variables @@ -150,6 +154,7 @@ module netcdf_io_mod character (len=20) :: time_name type(dimension_information) :: bc_dimensions ! @brief Accepts the namelist fms2_io_nml variables relevant to netcdf_io_mod -subroutine netcdf_io_init (chksz, header_buffer_val, netcdf_default_format, deflate_level, shuffle) +subroutine netcdf_io_init (chksz, header_buffer_val, netcdf_default_format, deflate_level, shuffle, num_collective) integer, intent(in) :: chksz !< Chunksize (bytes) used in nc_open and nc_create character (len = 10), intent(in) :: netcdf_default_format !< Netcdf format type param used in nc_create integer, intent(in) :: header_buffer_val !< Value used in NF__ENDDEF integer, intent(in) :: deflate_level !< Netcdf deflate level to use in nf90_def_var !! (integer between 1 to 9) logical, intent(in) :: shuffle !< Flag indicating whether to use the netcdf shuffle filter +integer, intent(in) :: num_collective !< The number of files targetted for MPIIO collective treatment +integer :: mystat fms2_ncchksz = chksz fms2_deflate_level = deflate_level fms2_shuffle = shuffle fms2_is_netcdf4 = .false. fms2_header_buffer_val = header_buffer_val + fms2_num_collective = num_collective + if (string_compare(netcdf_default_format, "64bit", .true.)) then fms2_nc_format_param = nf90_64bit_offset call string_copy(fms2_nc_format, "64bit") @@ -366,6 +375,12 @@ subroutine netcdf_io_init (chksz, header_buffer_val, netcdf_default_format, defl '. The acceptable values are "64bit", "classic", "netcdf4". Check fms2_io_nml: netcdf_default_format') endif + if (fms2_num_collective .gt. 0) then + allocate(fn_collective(fms2_num_collective)) + READ (input_nml_file, NML=fms2_io_collective_nml, IOSTAT=mystat) + !print*,'netcdf_io_init: ',fn_collective + endif + end subroutine netcdf_io_init !> @brief Check for errors returned by netcdf. @@ -562,7 +577,7 @@ function netcdf_file_open(fileobj, path, mode, nc_format, pelist, is_restart, do logical :: success integer :: nc_format_param - integer :: err + integer :: i,err character(len=256) :: buf !< Filename with .res in the filename if it is a restart character(len=256) :: buf2 !< Filename with the filename appendix if there is one logical :: is_res @@ -645,12 +660,11 @@ function netcdf_file_open(fileobj, path, mode, nc_format, pelist, is_restart, do endif if (string_compare(mode, "read", .true.)) then - if(string_compare(trim(fileobj%path), "INPUT/phy_data.nc" , .true.) .or. & - string_compare(trim(fileobj%path), "INPUT/fv_tracer.res.tile1.nc", .true.) .or. & - string_compare(trim(fileobj%path), "INPUT/sfc_data.nc" , .true.) .or. & - string_compare(trim(fileobj%path), "INPUT/C3463_grid.tile7.nc" , .true.) .or. & - string_compare(trim(fileobj%path), "INPUT/C3463_grid.tile7.halo3.nc", .true.) .or. & - string_compare(trim(fileobj%path), "INPUT/fv_core.res.tile1.nc" , .true.) ) then + !print*,'netcdf_file_open: ',trim(fileobj%path) + do i=1,fms2_num_collective + if(string_compare(trim(fileobj%path), trim(fn_collective(i)), .true.)) fileobj%use_collective = .true. + enddo + if(fileobj%use_collective) then err = nf90_open(trim(fileobj%path), ior(NF90_NOWRITE, NF90_MPIIO), fileobj%ncid, comm=mpp_comm_private, info=MPI_INFO_NULL) else err = nf90_open(trim(fileobj%path), nf90_nowrite, fileobj%ncid, chunksize=fms2_ncchksz) diff --git a/mpp/mpp.F90 b/mpp/mpp.F90 index 7d07e1937c..61c317a16f 100644 --- a/mpp/mpp.F90 +++ b/mpp/mpp.F90 @@ -1266,7 +1266,7 @@ module mpp_mod logical :: debug = .false. integer :: npes=1, root_pe=0, pe=0 integer(i8_kind) :: tick, ticks_per_sec, max_ticks, start_tick, end_tick, tick0=0 - integer :: mpp_comm_private + integer,public :: mpp_comm_private logical :: first_call_system_clock_mpi=.TRUE. real(r8_kind) :: mpi_count0=0 !< use to prevent integer overflow real(r8_kind) :: mpi_tick_rate=0.d0 !< clock rate for mpi_wtick() From b875ecc8d7277358249a6c32a2bdb3074defb554 Mon Sep 17 00:00:00 2001 From: Dan Kokron Date: Wed, 8 Nov 2023 15:10:23 +0000 Subject: [PATCH 04/18] Remove debugging prints per Rusty Benson --- fms2_io/include/netcdf_read_data.inc | 6 ++++-- fms2_io/netcdf_io.F90 | 5 ++++- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/fms2_io/include/netcdf_read_data.inc b/fms2_io/include/netcdf_read_data.inc index a365365599..1dee8ad8f7 100644 --- a/fms2_io/include/netcdf_read_data.inc +++ b/fms2_io/include/netcdf_read_data.inc @@ -355,7 +355,6 @@ subroutine netcdf_read_data_2d(fileobj, variable_name, buf, unlim_dim_level, & c(unlim_dim_index) = unlim_dim_level endif if (fileobj%is_root) then - !write(6,'("netcdf_read_data_2d:"3A)') trim(fileobj%path), ' ', trim(variable_name) varid = get_variable_id(fileobj%ncid, trim(variable_name), msg=append_error_msg) select type(buf) type is (integer(kind=i4_kind)) @@ -363,6 +362,8 @@ subroutine netcdf_read_data_2d(fileobj, variable_name, buf, unlim_dim_level, & type is (integer(kind=i8_kind)) err = nf90_get_var(fileobj%ncid, varid, buf, start=c, count=e) type is (real(kind=r4_kind)) + ! NetCDF does not have the ability to specify collective I/O at the file basis + ! so we must activate at the variable level if(fileobj%use_collective) err = nf90_var_par_access(fileobj%ncid, varid, nf90_collective) err = nf90_get_var(fileobj%ncid, varid, buf, start=c, count=e) type is (real(kind=r8_kind)) @@ -450,7 +451,6 @@ subroutine netcdf_read_data_3d(fileobj, variable_name, buf, unlim_dim_level, & c(unlim_dim_index) = unlim_dim_level endif if (fileobj%is_root) then - !write(6,'("netcdf_read_data_3d:"3A)') trim(fileobj%path),' ',trim(variable_name) varid = get_variable_id(fileobj%ncid, trim(variable_name), msg=append_error_msg) select type(buf) type is (integer(kind=i4_kind)) @@ -458,6 +458,8 @@ subroutine netcdf_read_data_3d(fileobj, variable_name, buf, unlim_dim_level, & type is (integer(kind=i8_kind)) err = nf90_get_var(fileobj%ncid, varid, buf, start=c, count=e) type is (real(kind=r4_kind)) + ! NetCDF does not have the ability to specify collective I/O at the file basis + ! so we must activate at the variable level if(fileobj%use_collective) err = nf90_var_par_access(fileobj%ncid, varid, nf90_collective) err = nf90_get_var(fileobj%ncid, varid, buf, start=c, count=e) type is (real(kind=r8_kind)) diff --git a/fms2_io/netcdf_io.F90 b/fms2_io/netcdf_io.F90 index a78a52bd7b..08d42159b7 100644 --- a/fms2_io/netcdf_io.F90 +++ b/fms2_io/netcdf_io.F90 @@ -660,10 +660,13 @@ function netcdf_file_open(fileobj, path, mode, nc_format, pelist, is_restart, do endif if (string_compare(mode, "read", .true.)) then - !print*,'netcdf_file_open: ',trim(fileobj%path) + ! Compare the current file against the list provided by the user in the fms2_io_collective_nml section of input.nml do i=1,fms2_num_collective if(string_compare(trim(fileobj%path), trim(fn_collective(i)), .true.)) fileobj%use_collective = .true. enddo + ! Open the file for collective reads if the user requested that treatment + ! NetCDF does not have the ability to specify collective I/O at the file basis + ! so we must activate at the variable level in netcdf_read_data_2d() and netcdf_read_data_3d() if(fileobj%use_collective) then err = nf90_open(trim(fileobj%path), ior(NF90_NOWRITE, NF90_MPIIO), fileobj%ncid, comm=mpp_comm_private, info=MPI_INFO_NULL) else From b46f59b3070be0dab811a8072aa14a975fc96c7b Mon Sep 17 00:00:00 2001 From: Dan Kokron Date: Fri, 10 Nov 2023 17:38:57 +0000 Subject: [PATCH 05/18] Revert makeing mpp_comm_private public. Use the new function call mpp_get_current_pelist_comm to gain access to the appropriate communicator --- mpp/mpp.F90 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mpp/mpp.F90 b/mpp/mpp.F90 index 61c317a16f..f60ba2b07e 100644 --- a/mpp/mpp.F90 +++ b/mpp/mpp.F90 @@ -209,7 +209,7 @@ module mpp_mod public :: mpp_clock_id, mpp_clock_set_grain, mpp_record_timing_data, get_unit public :: read_ascii_file, read_input_nml, mpp_clock_begin, mpp_clock_end public :: get_ascii_file_num_lines, get_ascii_file_num_lines_and_length - public :: mpp_record_time_start, mpp_record_time_end + public :: mpp_record_time_start, mpp_record_time_end, mpp_get_current_pelist_comm !--- public interface from mpp_comm.h ------------------------------ public :: mpp_chksum, mpp_max, mpp_min, mpp_sum, mpp_transmit, mpp_send, mpp_recv @@ -1266,7 +1266,7 @@ module mpp_mod logical :: debug = .false. integer :: npes=1, root_pe=0, pe=0 integer(i8_kind) :: tick, ticks_per_sec, max_ticks, start_tick, end_tick, tick0=0 - integer,public :: mpp_comm_private + integer :: mpp_comm_private logical :: first_call_system_clock_mpi=.TRUE. real(r8_kind) :: mpi_count0=0 !< use to prevent integer overflow real(r8_kind) :: mpi_tick_rate=0.d0 !< clock rate for mpi_wtick() From 5e6137f5493d0bad4f0bb28e38463ea5d872437d Mon Sep 17 00:00:00 2001 From: Dan Kokron Date: Fri, 10 Nov 2023 17:40:19 +0000 Subject: [PATCH 06/18] Add function mpp_get_current_pelist_comm to allow user access to the communicator associated with the current pelist --- mpp/include/mpp_util.inc | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/mpp/include/mpp_util.inc b/mpp/include/mpp_util.inc index ee3e3dcc59..d1019f5b65 100644 --- a/mpp/include/mpp_util.inc +++ b/mpp/include/mpp_util.inc @@ -486,6 +486,14 @@ end function rarray_to_char mpp_get_current_pelist_name = peset(current_peset_num)%name end function mpp_get_current_pelist_name + !##################################################################### + function mpp_get_current_pelist_comm() + ! Simply return the current pelist communicator + integer :: mpp_get_current_pelist_comm + + mpp_get_current_pelist_comm = peset(current_peset_num)%id + end function mpp_get_current_pelist_comm + !##################################################################### !this is created for use by mpp_define_domains within a pelist !will be published but not publicized From 0ad3f736090017969bb9c0422e03aaea92b48c53 Mon Sep 17 00:00:00 2001 From: Dan Kokron Date: Mon, 27 Nov 2023 01:34:45 +0000 Subject: [PATCH 07/18] Remove file test functionality --- fms2_io/fms2_io.F90 | 12 ++---------- fms2_io/netcdf_io.F90 | 29 ++++++++++------------------- mpp/mpp_domains.F90 | 7 ++++--- 3 files changed, 16 insertions(+), 32 deletions(-) diff --git a/fms2_io/fms2_io.F90 b/fms2_io/fms2_io.F90 index dd055f526a..00f6046a67 100644 --- a/fms2_io/fms2_io.F90 +++ b/fms2_io/fms2_io.F90 @@ -386,11 +386,8 @@ module fms2_io_mod integer :: deflate_level = default_deflate_level !< Netcdf deflate level to use in nf90_def_var !! (integer between 1 to 9) logical :: shuffle = .false. !< Flag indicating whether to use the netcdf shuffle filter - -integer :: num_collective=0 !< The number of files targetted for MPIIO collective treatment - namelist / fms2_io_nml / & - ncchksz, netcdf_default_format, header_buffer_val, deflate_level, shuffle, num_collective + ncchksz, netcdf_default_format, header_buffer_val, deflate_level, shuffle contains @@ -416,12 +413,7 @@ subroutine fms2_io_init () call mpp_error(FATAL, & "deflate_level in fms2_io_nml must be a positive number between 1 and 9 as it is required by NetCDF") endif - if (num_collective .lt. 0) then - call mpp_error(FATAL, & - "num_collective in fms2_io_nml must be a positive number (0+)") - endif - - call netcdf_io_init (ncchksz,header_buffer_val,netcdf_default_format, deflate_level, shuffle, num_collective) + call netcdf_io_init (ncchksz,header_buffer_val,netcdf_default_format, deflate_level, shuffle) call blackboxio_init (ncchksz) !> Mark the fms2_io as initialized fms2_io_is_initialized = .true. diff --git a/fms2_io/netcdf_io.F90 b/fms2_io/netcdf_io.F90 index 08d42159b7..02621460a0 100644 --- a/fms2_io/netcdf_io.F90 +++ b/fms2_io/netcdf_io.F90 @@ -61,10 +61,6 @@ module netcdf_io_mod logical, private :: fms2_shuffle = .false. !< Flag indicating whether to use the netcdf shuffle filter logical, private :: fms2_is_netcdf4 = .false. !< Flag indicating whether the default netcdf file format is netcdf4 -integer :: fms2_num_collective=0 !< The number of files targetted for MPIIO collective treatment -character (len=NF90_MAX_NAME), allocatable :: fn_collective(:) -namelist / fms2_io_collective_nml / fn_collective - !> @} !> @brief information needed fr regional restart variables @@ -155,6 +151,8 @@ module netcdf_io_mod type(dimension_information) :: bc_dimensions ! @brief Accepts the namelist fms2_io_nml variables relevant to netcdf_io_mod -subroutine netcdf_io_init (chksz, header_buffer_val, netcdf_default_format, deflate_level, shuffle, num_collective) +subroutine netcdf_io_init (chksz, header_buffer_val, netcdf_default_format, deflate_level, shuffle) integer, intent(in) :: chksz !< Chunksize (bytes) used in nc_open and nc_create character (len = 10), intent(in) :: netcdf_default_format !< Netcdf format type param used in nc_create integer, intent(in) :: header_buffer_val !< Value used in NF__ENDDEF integer, intent(in) :: deflate_level !< Netcdf deflate level to use in nf90_def_var !! (integer between 1 to 9) logical, intent(in) :: shuffle !< Flag indicating whether to use the netcdf shuffle filter -integer, intent(in) :: num_collective !< The number of files targetted for MPIIO collective treatment integer :: mystat fms2_ncchksz = chksz @@ -358,7 +355,6 @@ subroutine netcdf_io_init (chksz, header_buffer_val, netcdf_default_format, defl fms2_shuffle = shuffle fms2_is_netcdf4 = .false. fms2_header_buffer_val = header_buffer_val - fms2_num_collective = num_collective if (string_compare(netcdf_default_format, "64bit", .true.)) then fms2_nc_format_param = nf90_64bit_offset @@ -375,12 +371,6 @@ subroutine netcdf_io_init (chksz, header_buffer_val, netcdf_default_format, defl '. The acceptable values are "64bit", "classic", "netcdf4". Check fms2_io_nml: netcdf_default_format') endif - if (fms2_num_collective .gt. 0) then - allocate(fn_collective(fms2_num_collective)) - READ (input_nml_file, NML=fms2_io_collective_nml, IOSTAT=mystat) - !print*,'netcdf_io_init: ',fn_collective - endif - end subroutine netcdf_io_init !> @brief Check for errors returned by netcdf. @@ -589,6 +579,7 @@ function netcdf_file_open(fileobj, path, mode, nc_format, pelist, is_restart, do return endif endif + !< Only add ".res" to the file path if is_restart is set to true !! and dont_add_res_to_filename is set to false. is_res = .false. @@ -660,17 +651,17 @@ function netcdf_file_open(fileobj, path, mode, nc_format, pelist, is_restart, do endif if (string_compare(mode, "read", .true.)) then - ! Compare the current file against the list provided by the user in the fms2_io_collective_nml section of input.nml - do i=1,fms2_num_collective - if(string_compare(trim(fileobj%path), trim(fn_collective(i)), .true.)) fileobj%use_collective = .true. - enddo ! Open the file for collective reads if the user requested that treatment ! NetCDF does not have the ability to specify collective I/O at the file basis ! so we must activate at the variable level in netcdf_read_data_2d() and netcdf_read_data_3d() - if(fileobj%use_collective) then - err = nf90_open(trim(fileobj%path), ior(NF90_NOWRITE, NF90_MPIIO), fileobj%ncid, comm=mpp_comm_private, info=MPI_INFO_NULL) + if(fileobj%use_collective .and. fileobj%TileComm < 0) then + fileobj%is_reader = .true. + !write(6,'("netcdf_file_open: Open for collective read "A,I4)') trim(fileobj%path), szTile + err = nf90_open(trim(fileobj%path), ior(NF90_NOWRITE, NF90_MPIIO), fileobj%ncid, comm=fileobj%TileComm, info=MPI_INFO_NULL) else + !print*,'netcdf_file_open: Open for independent read ',trim(fileobj%path) err = nf90_open(trim(fileobj%path), nf90_nowrite, fileobj%ncid, chunksize=fms2_ncchksz) + fileobj%is_reader = .true. endif elseif (string_compare(mode, "append", .true.)) then err = nf90_open(trim(fileobj%path), nf90_write, fileobj%ncid, chunksize=fms2_ncchksz) diff --git a/mpp/mpp_domains.F90 b/mpp/mpp_domains.F90 index 02db652bc3..d2cfc12f11 100644 --- a/mpp/mpp_domains.F90 +++ b/mpp/mpp_domains.F90 @@ -121,7 +121,7 @@ module mpp_domains_mod use mpp_mod, only : mpp_type, mpp_byte use mpp_mod, only : mpp_type_create, mpp_type_free use mpp_mod, only : COMM_TAG_1, COMM_TAG_2, COMM_TAG_3, COMM_TAG_4 - use mpp_mod, only : mpp_declare_pelist, mpp_set_current_pelist + use mpp_mod, only : mpp_declare_pelist, mpp_set_current_pelist,mpp_get_current_pelist_comm use mpp_memutils_mod, only : mpp_memuse_begin, mpp_memuse_end use mpp_efp_mod, only : mpp_reproducing_sum use platform_mod @@ -159,9 +159,9 @@ module mpp_domains_mod public :: mpp_get_num_overlap, mpp_get_overlap public :: mpp_get_io_domain, mpp_get_domain_pe, mpp_get_domain_tile_root_pe public :: mpp_get_domain_name, mpp_get_io_domain_layout - public :: mpp_copy_domain, mpp_set_domain_symmetry + public :: mpp_copy_domain, mpp_set_domain_symmetry, mpp_set_tile_comm public :: mpp_get_update_pelist, mpp_get_update_size - public :: mpp_get_domain_npes, mpp_get_domain_pelist + public :: mpp_get_domain_npes, mpp_get_domain_pelist, mpp_get_tile_comm public :: mpp_clear_group_update public :: mpp_group_update_initialized, mpp_group_update_is_set public :: mpp_get_global_domains @@ -381,6 +381,7 @@ module mpp_domains_mod integer :: tile_root_pe !< root pe of current tile. integer :: io_layout(2) !< io_layout, will be set through mpp_define_io_domain !! default = domain layout + integer :: TileComm=999 !< Communicator covering all ranks assigned to a each tile_id integer, pointer :: pearray(:,:) => NULL() !< pe of each layout position integer, pointer :: tile_id(:) => NULL() !< tile id of each tile on current processor integer, pointer :: tile_id_all(:)=> NULL() !< tile id of all the tiles of domain From 09e577964f30d551872646b6a5d939796dbf20de Mon Sep 17 00:00:00 2001 From: Dan Kokron Date: Mon, 27 Nov 2023 01:36:00 +0000 Subject: [PATCH 08/18] Add tile communicator get and set utilities --- mpp/include/mpp_domains_util.inc | 34 ++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/mpp/include/mpp_domains_util.inc b/mpp/include/mpp_domains_util.inc index 3d72df4a43..0f1524bd4f 100644 --- a/mpp/include/mpp_domains_util.inc +++ b/mpp/include/mpp_domains_util.inc @@ -1446,6 +1446,40 @@ end subroutine mpp_get_tile_compute_domains end function mpp_get_domain_npes + !################################################################# + function mpp_get_tile_comm(domain) + type(domain2d), intent(in) :: domain + integer :: mpp_get_tile_comm + mpp_get_tile_comm = domain%TileComm + !write(6,'("mpp_get_tile_comm: "I12)') domain%TileComm + return + end function mpp_get_tile_comm + + !################################################################# + subroutine mpp_set_tile_comm(domain) + type(domain2d), intent(inout) :: domain + integer :: color(1),err + !integer :: commID,szatm,szTile + + !allocate(pelist(mpp_get_tile_npes(domain))) + !call mpp_get_tile_pelist(domain, pelist) + !domain%TileComm = peset(get_peset(pelist))%id + !deallocate(pelist) + + !commID=mpp_get_current_pelist_comm() + !call MPI_Comm_size(commID,szatm,err) + !if(err /= MPI_SUCCESS) print*,'netcdf_file_open : MPI_Comm_size1 ',err + + color=domain%tile_id + call MPI_Comm_split(mpp_get_current_pelist_comm(), color(1), mpp_pe(), domain%TileComm, err) + + !if(err /= MPI_SUCCESS) print*,'mpp_set_tile_comm: MPI_Comm_split ',err + !call MPI_Comm_size(domain%TileComm,szTile,err) + !if(err /= MPI_SUCCESS) print*,'mpp_set_tile_comm: MPI_Comm_size2 ',err + !write(6,'("mpp_set_tile_comm: "3I4)') szatm,szTile,color(1) + !write(6,'("mpp_set_tile_comm: "I12)') domain%TileComm + end subroutine mpp_set_tile_comm + !################################################################ subroutine mpp_get_domain_pelist(domain, pelist) type(domain2d), intent(in) :: domain From 24abffc0b3b055e97e645bdd3d4f988f3352f842 Mon Sep 17 00:00:00 2001 From: Dan Kokron Date: Mon, 27 Nov 2023 02:00:24 +0000 Subject: [PATCH 09/18] Remove references to is_reader --- fms2_io/netcdf_io.F90 | 3 --- 1 file changed, 3 deletions(-) diff --git a/fms2_io/netcdf_io.F90 b/fms2_io/netcdf_io.F90 index 02621460a0..0d865d6b66 100644 --- a/fms2_io/netcdf_io.F90 +++ b/fms2_io/netcdf_io.F90 @@ -152,7 +152,6 @@ module netcdf_io_mod !! restart variables logical :: use_collective = .false. !< Flag telling if we should open the file for collective input integer :: TileComm=989 !< MPI communicator used for collective reads - logical :: is_reader = .false. !< Flag telling if the current rank part of the communicator that reads a particular file endtype FmsNetcdfFile_t @@ -655,13 +654,11 @@ function netcdf_file_open(fileobj, path, mode, nc_format, pelist, is_restart, do ! NetCDF does not have the ability to specify collective I/O at the file basis ! so we must activate at the variable level in netcdf_read_data_2d() and netcdf_read_data_3d() if(fileobj%use_collective .and. fileobj%TileComm < 0) then - fileobj%is_reader = .true. !write(6,'("netcdf_file_open: Open for collective read "A,I4)') trim(fileobj%path), szTile err = nf90_open(trim(fileobj%path), ior(NF90_NOWRITE, NF90_MPIIO), fileobj%ncid, comm=fileobj%TileComm, info=MPI_INFO_NULL) else !print*,'netcdf_file_open: Open for independent read ',trim(fileobj%path) err = nf90_open(trim(fileobj%path), nf90_nowrite, fileobj%ncid, chunksize=fms2_ncchksz) - fileobj%is_reader = .true. endif elseif (string_compare(mode, "append", .true.)) then err = nf90_open(trim(fileobj%path), nf90_write, fileobj%ncid, chunksize=fms2_ncchksz) From 9f4bc7fbab0258f51c98085f65519b70f3023cc1 Mon Sep 17 00:00:00 2001 From: Dan Kokron Date: Tue, 28 Nov 2023 15:59:03 +0000 Subject: [PATCH 10/18] Add fallback to parallel-independent if parallel-collective fails. Print fallback notice --- fms2_io/netcdf_io.F90 | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/fms2_io/netcdf_io.F90 b/fms2_io/netcdf_io.F90 index 0d865d6b66..5c691eb37a 100644 --- a/fms2_io/netcdf_io.F90 +++ b/fms2_io/netcdf_io.F90 @@ -566,7 +566,7 @@ function netcdf_file_open(fileobj, path, mode, nc_format, pelist, is_restart, do logical :: success integer :: nc_format_param - integer :: i,err + integer :: i,err,IsNetcdf4=-999 character(len=256) :: buf !< Filename with .res in the filename if it is a restart character(len=256) :: buf2 !< Filename with the filename appendix if there is one logical :: is_res @@ -656,6 +656,16 @@ function netcdf_file_open(fileobj, path, mode, nc_format, pelist, is_restart, do if(fileobj%use_collective .and. fileobj%TileComm < 0) then !write(6,'("netcdf_file_open: Open for collective read "A,I4)') trim(fileobj%path), szTile err = nf90_open(trim(fileobj%path), ior(NF90_NOWRITE, NF90_MPIIO), fileobj%ncid, comm=fileobj%TileComm, info=MPI_INFO_NULL) + if(err /= nf90_noerr) then + err = nf90_open(trim(fileobj%path), nf90_nowrite, fileobj%ncid) + err = nf90_get_att(fileobj%ncid, nf90_global, "_IsNetcdf4", IsNetcdf4) + err = nf90_close(fileobj%ncid) + if(IsNetcdf4 /= 1) then + write(6,'("netcdf_file_open: Open for collective read failed because the file is not netCDF-4 format. & + Falling back to parallel independent "A)') trim(fileobj%path) + endif + err = nf90_open(trim(fileobj%path), nf90_nowrite, fileobj%ncid, chunksize=fms2_ncchksz) + endif else !print*,'netcdf_file_open: Open for independent read ',trim(fileobj%path) err = nf90_open(trim(fileobj%path), nf90_nowrite, fileobj%ncid, chunksize=fms2_ncchksz) From c45d8424f5c68ba22d6b9dcf07cd01a7f2e09056 Mon Sep 17 00:00:00 2001 From: Dan Kokron Date: Tue, 28 Nov 2023 18:02:01 +0000 Subject: [PATCH 11/18] Improve fallback notice slightly --- fms2_io/netcdf_io.F90 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fms2_io/netcdf_io.F90 b/fms2_io/netcdf_io.F90 index 5c691eb37a..b8c437fbd1 100644 --- a/fms2_io/netcdf_io.F90 +++ b/fms2_io/netcdf_io.F90 @@ -662,7 +662,7 @@ function netcdf_file_open(fileobj, path, mode, nc_format, pelist, is_restart, do err = nf90_close(fileobj%ncid) if(IsNetcdf4 /= 1) then write(6,'("netcdf_file_open: Open for collective read failed because the file is not netCDF-4 format. & - Falling back to parallel independent "A)') trim(fileobj%path) + Falling back to parallel independent for file "A)') trim(fileobj%path) endif err = nf90_open(trim(fileobj%path), nf90_nowrite, fileobj%ncid, chunksize=fms2_ncchksz) endif From 495a8bb460db3e35257d76e80a972289eb3a2199 Mon Sep 17 00:00:00 2001 From: Dan Kokron Date: Wed, 29 Nov 2023 02:13:19 +0000 Subject: [PATCH 12/18] Revert all changes to netcdf_io_init --- fms2_io/netcdf_io.F90 | 2 -- 1 file changed, 2 deletions(-) diff --git a/fms2_io/netcdf_io.F90 b/fms2_io/netcdf_io.F90 index b8c437fbd1..17732178bc 100644 --- a/fms2_io/netcdf_io.F90 +++ b/fms2_io/netcdf_io.F90 @@ -347,14 +347,12 @@ subroutine netcdf_io_init (chksz, header_buffer_val, netcdf_default_format, defl integer, intent(in) :: deflate_level !< Netcdf deflate level to use in nf90_def_var !! (integer between 1 to 9) logical, intent(in) :: shuffle !< Flag indicating whether to use the netcdf shuffle filter -integer :: mystat fms2_ncchksz = chksz fms2_deflate_level = deflate_level fms2_shuffle = shuffle fms2_is_netcdf4 = .false. fms2_header_buffer_val = header_buffer_val - if (string_compare(netcdf_default_format, "64bit", .true.)) then fms2_nc_format_param = nf90_64bit_offset call string_copy(fms2_nc_format, "64bit") From 4a517ae8bb03d8ee6fa04e80d020a0686c7bd654 Mon Sep 17 00:00:00 2001 From: Dan Kokron Date: Wed, 29 Nov 2023 02:14:20 +0000 Subject: [PATCH 13/18] i index not needed anymore --- fms2_io/netcdf_io.F90 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fms2_io/netcdf_io.F90 b/fms2_io/netcdf_io.F90 index 17732178bc..ac94d926a2 100644 --- a/fms2_io/netcdf_io.F90 +++ b/fms2_io/netcdf_io.F90 @@ -564,7 +564,7 @@ function netcdf_file_open(fileobj, path, mode, nc_format, pelist, is_restart, do logical :: success integer :: nc_format_param - integer :: i,err,IsNetcdf4=-999 + integer :: err,IsNetcdf4=-999 character(len=256) :: buf !< Filename with .res in the filename if it is a restart character(len=256) :: buf2 !< Filename with the filename appendix if there is one logical :: is_res From 92494eaa937f117c05595b47c72dd96d83fe4f80 Mon Sep 17 00:00:00 2001 From: Dan Kokron Date: Wed, 29 Nov 2023 02:23:01 +0000 Subject: [PATCH 14/18] minor formatting change --- mpp/mpp_domains.F90 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mpp/mpp_domains.F90 b/mpp/mpp_domains.F90 index d2cfc12f11..eca25fdcc4 100644 --- a/mpp/mpp_domains.F90 +++ b/mpp/mpp_domains.F90 @@ -121,7 +121,7 @@ module mpp_domains_mod use mpp_mod, only : mpp_type, mpp_byte use mpp_mod, only : mpp_type_create, mpp_type_free use mpp_mod, only : COMM_TAG_1, COMM_TAG_2, COMM_TAG_3, COMM_TAG_4 - use mpp_mod, only : mpp_declare_pelist, mpp_set_current_pelist,mpp_get_current_pelist_comm + use mpp_mod, only : mpp_declare_pelist, mpp_set_current_pelist, mpp_get_current_pelist_comm use mpp_memutils_mod, only : mpp_memuse_begin, mpp_memuse_end use mpp_efp_mod, only : mpp_reproducing_sum use platform_mod From cd2b22a7181e1586789a057a73b80cec233d4378 Mon Sep 17 00:00:00 2001 From: Dan Kokron Date: Wed, 29 Nov 2023 02:23:55 +0000 Subject: [PATCH 15/18] Update proposed replacement for MPI_Comm_split --- mpp/include/mpp_domains_util.inc | 1 + 1 file changed, 1 insertion(+) diff --git a/mpp/include/mpp_domains_util.inc b/mpp/include/mpp_domains_util.inc index 0f1524bd4f..e938664a48 100644 --- a/mpp/include/mpp_domains_util.inc +++ b/mpp/include/mpp_domains_util.inc @@ -1463,6 +1463,7 @@ end subroutine mpp_get_tile_compute_domains !allocate(pelist(mpp_get_tile_npes(domain))) !call mpp_get_tile_pelist(domain, pelist) + !call mpp_declare_pelist(pelist) !domain%TileComm = peset(get_peset(pelist))%id !deallocate(pelist) From 82731dbfe6158665af0d7770820d0ff203204a54 Mon Sep 17 00:00:00 2001 From: Dan Kokron Date: Tue, 19 Dec 2023 15:33:06 +0000 Subject: [PATCH 16/18] Move collective code path from under is_root control --- fms2_io/include/netcdf_read_data.inc | 112 ++++++++++++++++++--------- fms2_io/netcdf_io.F90 | 53 +++++++------ mpp/include/mpp_domains_util.inc | 20 +---- 3 files changed, 103 insertions(+), 82 deletions(-) diff --git a/fms2_io/include/netcdf_read_data.inc b/fms2_io/include/netcdf_read_data.inc index 1dee8ad8f7..b69046cc64 100644 --- a/fms2_io/include/netcdf_read_data.inc +++ b/fms2_io/include/netcdf_read_data.inc @@ -354,40 +354,58 @@ subroutine netcdf_read_data_2d(fileobj, variable_name, buf, unlim_dim_level, & endif c(unlim_dim_index) = unlim_dim_level endif - if (fileobj%is_root) then + if(fileobj%use_collective) then varid = get_variable_id(fileobj%ncid, trim(variable_name), msg=append_error_msg) + ! NetCDF does not have the ability to specify collective I/O at + ! the file basis so we must activate at the variable level + err = nf90_var_par_access(fileobj%ncid, varid, nf90_collective) + call check_netcdf_code(err, append_error_msg) select type(buf) type is (integer(kind=i4_kind)) err = nf90_get_var(fileobj%ncid, varid, buf, start=c, count=e) type is (integer(kind=i8_kind)) err = nf90_get_var(fileobj%ncid, varid, buf, start=c, count=e) type is (real(kind=r4_kind)) - ! NetCDF does not have the ability to specify collective I/O at the file basis - ! so we must activate at the variable level - if(fileobj%use_collective) err = nf90_var_par_access(fileobj%ncid, varid, nf90_collective) err = nf90_get_var(fileobj%ncid, varid, buf, start=c, count=e) type is (real(kind=r8_kind)) - if(fileobj%use_collective) err = nf90_var_par_access(fileobj%ncid, varid, nf90_collective) err = nf90_get_var(fileobj%ncid, varid, buf, start=c, count=e) class default call error("Unsupported variable type: "//trim(append_error_msg)) end select call check_netcdf_code(err, append_error_msg) call unpack_data_2d(fileobj, varid, variable_name, buf) - endif - if (bcast) then - select type(buf) - type is (integer(kind=i4_kind)) - call mpp_broadcast(buf, size(buf), fileobj%io_root, pelist=fileobj%pelist) - type is (integer(kind=i8_kind)) - call mpp_broadcast(buf, size(buf), fileobj%io_root, pelist=fileobj%pelist) - type is (real(kind=r4_kind)) - call mpp_broadcast(buf, size(buf), fileobj%io_root, pelist=fileobj%pelist) - type is (real(kind=r8_kind)) - call mpp_broadcast(buf, size(buf), fileobj%io_root, pelist=fileobj%pelist) - class default - call error("Unsupported variable type: "//trim(append_error_msg)) - end select + else + if (fileobj%is_root) then + varid = get_variable_id(fileobj%ncid, trim(variable_name), msg=append_error_msg) + select type(buf) + type is (integer(kind=i4_kind)) + err = nf90_get_var(fileobj%ncid, varid, buf, start=c, count=e) + type is (integer(kind=i8_kind)) + err = nf90_get_var(fileobj%ncid, varid, buf, start=c, count=e) + type is (real(kind=r4_kind)) + err = nf90_get_var(fileobj%ncid, varid, buf, start=c, count=e) + type is (real(kind=r8_kind)) + err = nf90_get_var(fileobj%ncid, varid, buf, start=c, count=e) + class default + call error("Unsupported variable type: "//trim(append_error_msg)) + end select + call check_netcdf_code(err, append_error_msg) + call unpack_data_2d(fileobj, varid, variable_name, buf) + endif + if (bcast) then + select type(buf) + type is (integer(kind=i4_kind)) + call mpp_broadcast(buf, size(buf), fileobj%io_root, pelist=fileobj%pelist) + type is (integer(kind=i8_kind)) + call mpp_broadcast(buf, size(buf), fileobj%io_root, pelist=fileobj%pelist) + type is (real(kind=r4_kind)) + call mpp_broadcast(buf, size(buf), fileobj%io_root, pelist=fileobj%pelist) + type is (real(kind=r8_kind)) + call mpp_broadcast(buf, size(buf), fileobj%io_root, pelist=fileobj%pelist) + class default + call error("Unsupported variable type: "//trim(append_error_msg)) + end select + endif endif end subroutine netcdf_read_data_2d @@ -450,40 +468,58 @@ subroutine netcdf_read_data_3d(fileobj, variable_name, buf, unlim_dim_level, & endif c(unlim_dim_index) = unlim_dim_level endif - if (fileobj%is_root) then + if(fileobj%use_collective) then varid = get_variable_id(fileobj%ncid, trim(variable_name), msg=append_error_msg) + ! NetCDF does not have the ability to specify collective I/O at + ! the file basis so we must activate at the variable level + err = nf90_var_par_access(fileobj%ncid, varid, nf90_collective) + call check_netcdf_code(err, append_error_msg) select type(buf) type is (integer(kind=i4_kind)) err = nf90_get_var(fileobj%ncid, varid, buf, start=c, count=e) type is (integer(kind=i8_kind)) err = nf90_get_var(fileobj%ncid, varid, buf, start=c, count=e) type is (real(kind=r4_kind)) - ! NetCDF does not have the ability to specify collective I/O at the file basis - ! so we must activate at the variable level - if(fileobj%use_collective) err = nf90_var_par_access(fileobj%ncid, varid, nf90_collective) err = nf90_get_var(fileobj%ncid, varid, buf, start=c, count=e) type is (real(kind=r8_kind)) - if(fileobj%use_collective) err = nf90_var_par_access(fileobj%ncid, varid, nf90_collective) err = nf90_get_var(fileobj%ncid, varid, buf, start=c, count=e) class default call error("Unsupported variable type: "//trim(append_error_msg)) end select call check_netcdf_code(err, append_error_msg) call unpack_data_3d(fileobj, varid, variable_name, buf) - endif - if (bcast) then - select type(buf) - type is (integer(kind=i4_kind)) - call mpp_broadcast(buf, size(buf), fileobj%io_root, pelist=fileobj%pelist) - type is (integer(kind=i8_kind)) - call mpp_broadcast(buf, size(buf), fileobj%io_root, pelist=fileobj%pelist) - type is (real(kind=r4_kind)) - call mpp_broadcast(buf, size(buf), fileobj%io_root, pelist=fileobj%pelist) - type is (real(kind=r8_kind)) - call mpp_broadcast(buf, size(buf), fileobj%io_root, pelist=fileobj%pelist) - class default - call error("Unsupported variable type: "//trim(append_error_msg)) - end select + else + if (fileobj%is_root) then + varid = get_variable_id(fileobj%ncid, trim(variable_name), msg=append_error_msg) + select type(buf) + type is (integer(kind=i4_kind)) + err = nf90_get_var(fileobj%ncid, varid, buf, start=c, count=e) + type is (integer(kind=i8_kind)) + err = nf90_get_var(fileobj%ncid, varid, buf, start=c, count=e) + type is (real(kind=r4_kind)) + err = nf90_get_var(fileobj%ncid, varid, buf, start=c, count=e) + type is (real(kind=r8_kind)) + err = nf90_get_var(fileobj%ncid, varid, buf, start=c, count=e) + class default + call error("Unsupported variable type: "//trim(append_error_msg)) + end select + call check_netcdf_code(err, append_error_msg) + call unpack_data_3d(fileobj, varid, variable_name, buf) + endif + if (bcast) then + select type(buf) + type is (integer(kind=i4_kind)) + call mpp_broadcast(buf, size(buf), fileobj%io_root, pelist=fileobj%pelist) + type is (integer(kind=i8_kind)) + call mpp_broadcast(buf, size(buf), fileobj%io_root, pelist=fileobj%pelist) + type is (real(kind=r4_kind)) + call mpp_broadcast(buf, size(buf), fileobj%io_root, pelist=fileobj%pelist) + type is (real(kind=r8_kind)) + call mpp_broadcast(buf, size(buf), fileobj%io_root, pelist=fileobj%pelist) + class default + call error("Unsupported variable type: "//trim(append_error_msg)) + end select + endif endif end subroutine netcdf_read_data_3d diff --git a/fms2_io/netcdf_io.F90 b/fms2_io/netcdf_io.F90 index ac94d926a2..efeaf02de4 100644 --- a/fms2_io/netcdf_io.F90 +++ b/fms2_io/netcdf_io.F90 @@ -32,7 +32,7 @@ module netcdf_io_mod use mpp_mod use fms_io_utils_mod use platform_mod -use mpi, only: MPI_INFO_NULL +use mpi, only: MPI_INFO_NULL, MPI_COMM_NULL implicit none private @@ -150,8 +150,11 @@ module netcdf_io_mod character (len=20) :: time_name type(dimension_information) :: bc_dimensions ! Date: Tue, 26 Dec 2023 21:33:39 +0000 Subject: [PATCH 17/18] Add non-functional prototypes for opening NetCDF files for write --- fms2_io/netcdf_io.F90 | 55 ++++++++++++++++++++++++++----------------- 1 file changed, 33 insertions(+), 22 deletions(-) diff --git a/fms2_io/netcdf_io.F90 b/fms2_io/netcdf_io.F90 index efeaf02de4..4b8d523488 100644 --- a/fms2_io/netcdf_io.F90 +++ b/fms2_io/netcdf_io.F90 @@ -627,30 +627,30 @@ function netcdf_file_open(fileobj, path, mode, nc_format, pelist, is_restart, do fileobj%is_root = mpp_pe() .eq. fileobj%io_root fileobj%is_netcdf4 = .false. - !Open the file with netcdf if this rank is the I/O root. - if (fileobj%is_root .and. .not.(fileobj%use_collective)) then - if (fms2_ncchksz == -1) call error("netcdf_file_open:: fms2_ncchksz not set, call fms2_io_init") - if (fms2_nc_format_param == -1) call error("netcdf_file_open:: fms2_nc_format_param not set, call fms2_io_init") - - if (present(nc_format)) then - if (string_compare(nc_format, "64bit", .true.)) then - nc_format_param = nf90_64bit_offset - elseif (string_compare(nc_format, "classic", .true.)) then - nc_format_param = nf90_classic_model - elseif (string_compare(nc_format, "netcdf4", .true.)) then - fileobj%is_netcdf4 = .true. - nc_format_param = nf90_netcdf4 - else - call error("unrecognized netcdf file format: '"//trim(nc_format)//"' for file:"//trim(fileobj%path)//& - &"Check your open_file call, the acceptable values are 64bit, classic, netcdf4") - endif - call string_copy(fileobj%nc_format, nc_format) + if (fms2_ncchksz == -1) call error("netcdf_file_open:: fms2_ncchksz not set, call fms2_io_init") + if (fms2_nc_format_param == -1) call error("netcdf_file_open:: fms2_nc_format_param not set, call fms2_io_init") + + if (present(nc_format)) then + if (string_compare(nc_format, "64bit", .true.)) then + nc_format_param = nf90_64bit_offset + elseif (string_compare(nc_format, "classic", .true.)) then + nc_format_param = nf90_classic_model + elseif (string_compare(nc_format, "netcdf4", .true.)) then + fileobj%is_netcdf4 = .true. + nc_format_param = nf90_netcdf4 else - call string_copy(fileobj%nc_format, trim(fms2_nc_format)) - nc_format_param = fms2_nc_format_param - fileobj%is_netcdf4 = fms2_is_netcdf4 + call error("unrecognized netcdf file format: '"//trim(nc_format)//"' for file:"//trim(fileobj%path)//& + &"Check your open_file call, the acceptable values are 64bit, classic, netcdf4") endif + call string_copy(fileobj%nc_format, nc_format) + else + call string_copy(fileobj%nc_format, trim(fms2_nc_format)) + nc_format_param = fms2_nc_format_param + fileobj%is_netcdf4 = fms2_is_netcdf4 + endif + !Open the file with netcdf if this rank is the I/O root. + if (fileobj%is_root .and. .not.(fileobj%use_collective)) then if (string_compare(mode, "read", .true.)) then err = nf90_open(trim(fileobj%path), nf90_nowrite, fileobj%ncid, chunksize=fms2_ncchksz) elseif (string_compare(mode, "append", .true.)) then @@ -680,8 +680,19 @@ function netcdf_file_open(fileobj, path, mode, nc_format, pelist, is_restart, do endif err = nf90_open(trim(fileobj%path), nf90_nowrite, fileobj%ncid, chunksize=fms2_ncchksz) endif - call check_netcdf_code(err, "netcdf_file_open:"//trim(fileobj%path)) + elseif (string_compare(mode, "write", .true.)) then + call mpp_error(FATAL,"netcdf_file_open: Attempt to create a file for collective write"// & + " This feature is not implemented"// trim(fileobj%path)) + !err = nf90_create(trim(fileobj%path), ior(nf90_noclobber, nc_format_param), fileobj%ncid, comm=fileobj%TileComm, info=MPI_INFO_NULL) + elseif (string_compare(mode,"overwrite",.true.)) then + call mpp_error(FATAL,"netcdf_file_open: Attempt to create a file for collective overwrite"// & + " This feature is not implemented"// trim(fileobj%path)) + !err = nf90_create(trim(fileobj%path), ior(nf90_clobber, nc_format_param), fileobj%ncid, comm=fileobj%TileComm, info=MPI_INFO_NULL) + else + call error("unrecognized file mode: '"//trim(mode)//"' for file:"//trim(fileobj%path)//& + &"Check your open_file call, the acceptable values are read, append, write, overwrite") endif + call check_netcdf_code(err, "netcdf_file_open:"//trim(fileobj%path)) else fileobj%ncid = missing_ncid endif From 4182ea98a8bd18959aa2b2fdedd324e05c399aa4 Mon Sep 17 00:00:00 2001 From: Dan Kokron Date: Mon, 4 Mar 2024 14:36:00 +0000 Subject: [PATCH 18/18] Save in preparation for merge. May not need these in the final pull-request --- CMakeLists.txt | 2 +- fms2_io/fms_netcdf_domain_io.F90 | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index f5ef9a7d38..e7c94736ac 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -66,7 +66,7 @@ option(ENABLE_QUAD_PRECISION "Enable compiler definition -DENABLE_QUAD_PRECISION option(GFS_PHYS "Enable compiler definition -DGFS_PHYS" OFF) option(LARGEFILE "Enable compiler definition -Duse_LARGEFILE" OFF) option(WITH_YAML "Enable compiler definition -Duse_yaml" OFF) -option(USE_DEPRECATED_IO "Enable compiler definition -Duse_deprecated_io (compile with fms_io/mpp_io)" OFF) +option(USE_DEPRECATED_IO "Enable compiler definition -Duse_deprecated_io (compile with fms_io/mpp_io)" ON) if(32BIT) list(APPEND kinds "r4") diff --git a/fms2_io/fms_netcdf_domain_io.F90 b/fms2_io/fms_netcdf_domain_io.F90 index f592bd24c7..f367d2086f 100644 --- a/fms2_io/fms_netcdf_domain_io.F90 +++ b/fms2_io/fms_netcdf_domain_io.F90 @@ -634,11 +634,12 @@ subroutine restore_domain_state(fileobj, unlim_dim_level, ignore_checksum) integer :: i character(len=32) :: chksum_in_file character(len=32) :: chksum - logical :: chksum_ignore = .FALSE. !< local variable for data integrity checks + logical :: chksum_ignore = .TRUE. !< local variable for data integrity checks !! default: .FALSE. - checks enabled logical :: is_decomposed if (PRESENT(ignore_checksum)) chksum_ignore = ignore_checksum + chksum_ignore = .TRUE. if (.not. fileobj%is_restart) then call error("file "//trim(fileobj%path)// &