From 2d0696890eba0cb0f4fe95ec12f8535f7af89611 Mon Sep 17 00:00:00 2001 From: Joerg Henrichs Date: Tue, 15 Feb 2022 09:16:28 +1100 Subject: [PATCH 01/20] #64 Allow not using a t-mask with non-periodic boundary conditions. --- doc/api.rst | 9 +++------ finite_difference/src/grid_mod.f90 | 20 +++----------------- 2 files changed, 6 insertions(+), 23 deletions(-) diff --git a/doc/api.rst b/doc/api.rst index 4b99718..d05a0f6 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -49,7 +49,7 @@ Three types of boundary condition are currently supported: Name Description =============== ========================================= GO_BC_NONE No boundary conditions are applied. -GO_BC_EXTERNAL Some external forcing is applied. This must be implemented by a kernel. The domain must be defined with a T-point mask (see :ref:`gocean1.0-grid-init`). +GO_BC_EXTERNAL Some external forcing is applied. This must be implemented by a kernel. GO_BC_PERIODIC Periodic boundary conditions are applied. =============== ========================================= @@ -98,11 +98,8 @@ object. This is done via a call to the ``grid_init`` subroutine:: !! wet (1), dry (0) or external (-1). integer, dimension(m,n), intent(in), optional :: tmask -If no T-mask is supplied then this routine configures the grid -appropriately for an all-wet domain with periodic boundary conditions -in both the *x*- and *y*-dimensions. It should also be noted that -currently only grids with constant resolution in *x* and *y* are -supported by this routine. +It should be noted that currently only grids with constant +resolution in *x* and *y* are supported by this routine. .. _gocean1.0-fields: diff --git a/finite_difference/src/grid_mod.f90 b/finite_difference/src/grid_mod.f90 index 272f037..759e282 100644 --- a/finite_difference/src/grid_mod.f90 +++ b/finite_difference/src/grid_mod.f90 @@ -67,9 +67,8 @@ module grid_mod !! This is the key quantity that determines the region that !! is actually simulated. However, we also support the !! specification of a model consisting entirely of wet points - !! with periodic boundary conditions. Since this does not - !! require a T-mask, we do not allocate this array for that - !! case. + !! Since this does not require a T-mask, we do not allocate + !! this array for that case. integer, allocatable :: tmask(:,:) !> Pointer to tmask on remote device (if any) type(c_ptr) :: tmask_device @@ -294,8 +293,7 @@ end function grid_constructor !! @param[in] dxarg Grid spacing in x dimension !! @param[in] dyarg Grid spacing in y dimension !! @param[in] tmask Array holding the T-point mask which defines - !! the contents of the local domain. Need not be - !! supplied if domain is all wet and has PBCs. + !! the contents of the local domain. subroutine grid_init(grid, dxarg, dyarg, tmask) use decomposition_mod, only: subdomain_type, decomposition_type use parallel_mod, only: map_comms, get_rank, get_num_ranks, on_master @@ -395,18 +393,6 @@ subroutine grid_init(grid, dxarg, dyarg, tmask) do ji = xstop+2, grid%nx grid%tmask(ji, :) = grid%tmask(xstop+1, :) end do - else - ! No T-mask supplied. Check that grid has PBCs in both - ! x and y dimensions otherwise we won't know what to do. - if( .not. ( (grid%boundary_conditions(1) == GO_BC_PERIODIC) .and. & - (grid%boundary_conditions(2) == GO_BC_PERIODIC) ) )then - call gocean_stop('grid_init: ERROR: No T-mask supplied and '// & - 'grid does not have periodic boundary conditions!') - end if - !> TODO add support for PBCs in parallel - if(get_num_ranks() > 1)then - call gocean_stop('grid_init: PBCs not yet implemented with MPI') - end if end if ! T-mask supplied ! For a regular, orthogonal mesh the spatial resolution is constant From 4500abd0b2be5df0a7392a0dc85acbea69fd2f19 Mon Sep 17 00:00:00 2001 From: Joerg Henrichs Date: Thu, 24 Feb 2022 16:11:42 +1100 Subject: [PATCH 02/20] #64 Added support for gathering a distributed field into a Fortran array. --- finite_difference/src/field_mod.f90 | 94 +++++++++++++++++++++++++++-- 1 file changed, 90 insertions(+), 4 deletions(-) diff --git a/finite_difference/src/field_mod.f90 b/finite_difference/src/field_mod.f90 index f6fc26e..37e5556 100644 --- a/finite_difference/src/field_mod.f90 +++ b/finite_difference/src/field_mod.f90 @@ -161,6 +161,7 @@ end subroutine write_to_device_f_interface procedure, pass :: read_from_device procedure, pass :: write_to_device procedure, public :: halo_exchange + procedure, public :: gather_inner_data end type r2d_field !> Interface for the copy_field operation. Overloaded to take @@ -237,9 +238,10 @@ end subroutine write_to_device_f_interface !=================================================== - function r2d_field_constructor(grid, & + function r2d_field_constructor(grid, & grid_points, & - do_tile) result(self) + do_tile, & + init_global_data) result(self) use parallel_mod, only: go_decompose, on_master !$ use omp_lib, only : omp_get_max_threads implicit none @@ -252,6 +254,9 @@ function r2d_field_constructor(grid, & !> a single field should be allocated (which is not currently !> supported by PSyclone) logical, intent(in), optional :: do_tile + !> An optional global array with which the field in each domain + !> will be initialsed + real(go_wp), dimension(:,:), intent(in), optional :: init_global_data ! Local declarations type(r2d_field), target :: self logical :: local_do_tiling = .false. @@ -261,7 +266,7 @@ function r2d_field_constructor(grid, & !> The upper bounds actually used to allocate arrays (as opposed !! to the limits carried around with the field) integer :: upper_x_bound, upper_y_bound - integer :: itile, nthreads, ntilex, ntiley + integer :: itile, nthreads, ntilex, ntiley, dx, dy if (present(do_tile)) then local_do_tiling = do_tile @@ -349,7 +354,7 @@ function r2d_field_constructor(grid, & end if ! Since we're allocating the arrays to be larger than strictly - ! required we explicitly set all elements to -999 in case the code + ! required we explicitly set all elements to 0 in case the code ! does access 'out-of-bounds' elements during speculative ! execution. If we're running with OpenMP this also gives ! us the opportunity to do a 'first touch' policy to aid with @@ -368,6 +373,17 @@ function r2d_field_constructor(grid, & else self%data(:,:) = 0.0_go_wp end if + + if (present(init_global_data)) then + dx = grid%subdomain%global%xstart - self%internal%xstart + dy = grid%subdomain%global%ystart - self%internal%ystart + + do jj = grid%subdomain%internal%ystart, grid%subdomain%internal%ystop + do ji = grid%subdomain%internal%xstart, grid%subdomain%internal%xstop + self%data(ji, jj) = init_global_data(ji+dx, jj+dy) + end do + end do + end if end function r2d_field_constructor !=================================================== @@ -506,6 +522,8 @@ subroutine write_to_device(self, startx, starty, nx, ny, blocking) end subroutine write_to_device + !=================================================== + function get_data(self) result(dptr) !> Getter for the data associated with a field. If the data is on a ! device it ensures that the host copy is up-to-date with that on @@ -1287,6 +1305,74 @@ end function array_checksum !=================================================== + ! Collect a distributed array into a global array + subroutine gather_inner_data(self, global_data) + use parallel_utils_mod, only: get_num_ranks + use parallel_mod, only: on_master + + use MPI + !, only: MPI_COMM_WORLD, MPI_DOUBLE_PRECISION& + !, MPI_Comm_size, MPI_Barrier, MPI_GATHERV + implicit none + class(r2d_field), intent(in) :: self + real(go_wp), dimension(:,:), & + allocatable, intent(out) :: global_data + + real(go_wp), dimension(:), allocatable :: send_buffer, recv_buffer + + integer :: dx, dy, ji, jj, i, n, ierr, rank + integer :: x_start, x_stop, y_start, y_stop + + allocate(global_data(self%grid%global_nx, self%grid%global_ny)) + + ! No MPI (or single process), just copy the data out + if (get_num_ranks() == 1) then + ! Compute size of inner area only + dx = self%internal%xstart - 1 + dy = self%internal%ystart - 1 + do jj= self%internal%ystart, self%internal%ystop + do ji = self%internal%xstart, self%internal%xstop + global_data(ji-dx,jj-dy) = self%data(ji,jj) + end do + end do + return + endif + + n = self%grid%decomp%max_width *self%grid%decomp%max_height + allocate(send_buffer(n)) + i = 0 + do jj= self%internal%ystart, self%internal%ystop + do ji = self%internal%xstart, self%internal%xstop + i = i + 1 + send_buffer(i) = self%data(ji,jj) + end do + end do + + allocate(recv_buffer(get_num_ranks()*n)) + call MPI_Gather(send_buffer, n, MPI_DOUBLE_PRECISION, & + recv_buffer, n, MPI_DOUBLE_PRECISION, & + 0, MPI_COMM_WORLD, ierr) + + if (on_master()) then + do rank=1, get_num_ranks() + x_start = self%grid%decomp%subdomains(rank)%global%xstart + x_stop = self%grid%decomp%subdomains(rank)%global%xstop + y_start = self%grid%decomp%subdomains(rank)%global%ystart + y_stop = self%grid%decomp%subdomains(rank)%global%ystop + i = (rank-1) * n + do jj= y_start, y_stop + do ji = x_start, x_stop + i = i + 1 + global_data(ji, jj) = recv_buffer(i) + end do + end do + enddo + endif + + end subroutine gather_inner_data + + !=================================================== + subroutine init_periodic_bc_halos(fld) implicit none class(field_type), intent(inout) :: fld From 28a56472e6124d2c2034e84fe115f9be1cc38267 Mon Sep 17 00:00:00 2001 From: Joerg Henrichs Date: Thu, 24 Feb 2022 16:45:49 +1100 Subject: [PATCH 03/20] #64 Updated documentation. --- doc/api.rst | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/doc/api.rst b/doc/api.rst index d05a0f6..f674198 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -125,11 +125,17 @@ constructor:: sshn_v = r2d_field(model_grid, GO_V_POINTS) sshn_t = r2d_field(model_grid, GO_T_POINTS) -The constructor takes two arguments: +The constructor takes two mandatory and two optional arguments: 1. The grid on which the field exists 2. The type of grid point at which the field is defined (``GO_U_POINTS``, ``GO_V_POINTS``, ``GO_T_POINTS`` or ``GO_F_POINTS``) + 3. ``do_tile``: If the field should be tiled among all threads, or if only + a single field should be allocated (which is not currently + supported by PSyclone). + 4. ``init_global_data``: a global 2D Fortran array, which must be + provided on each rank. On each rank the field will be initialised + with the data from the corresponding subdomain. Note that the grid object must have been fully configured (by a call to ``grid_init`` for instance) before it is passed into this From 319129f9ac1004df9b0115af1970542d69603966 Mon Sep 17 00:00:00 2001 From: Joerg Henrichs Date: Fri, 25 Feb 2022 12:56:10 +1100 Subject: [PATCH 04/20] #64 Moved MPI into parallel_utils_mod, so that field is independent of MPI. --- finite_difference/src/field_mod.f90 | 15 +++++--------- .../src/parallel/parallel_utils_mod.f90 | 20 +++++++++++++++++-- .../src/parallel/parallel_utils_stub_mod.f90 | 14 ++++++++++++- 3 files changed, 36 insertions(+), 13 deletions(-) diff --git a/finite_difference/src/field_mod.f90 b/finite_difference/src/field_mod.f90 index 37e5556..182487a 100644 --- a/finite_difference/src/field_mod.f90 +++ b/finite_difference/src/field_mod.f90 @@ -1305,15 +1305,12 @@ end function array_checksum !=================================================== - ! Collect a distributed array into a global array + !> Collect a (distributed) field into a global array + !! on the master node. subroutine gather_inner_data(self, global_data) - use parallel_utils_mod, only: get_num_ranks + use parallel_utils_mod, only: get_num_ranks, gather use parallel_mod, only: on_master - use MPI - !, only: MPI_COMM_WORLD, MPI_DOUBLE_PRECISION& - !, MPI_Comm_size, MPI_Barrier, MPI_GATHERV - implicit none class(r2d_field), intent(in) :: self real(go_wp), dimension(:,:), & allocatable, intent(out) :: global_data @@ -1340,6 +1337,7 @@ subroutine gather_inner_data(self, global_data) n = self%grid%decomp%max_width *self%grid%decomp%max_height allocate(send_buffer(n)) + allocate(recv_buffer(n*get_num_ranks())) i = 0 do jj= self%internal%ystart, self%internal%ystop do ji = self%internal%xstart, self%internal%xstop @@ -1348,10 +1346,7 @@ subroutine gather_inner_data(self, global_data) end do end do - allocate(recv_buffer(get_num_ranks()*n)) - call MPI_Gather(send_buffer, n, MPI_DOUBLE_PRECISION, & - recv_buffer, n, MPI_DOUBLE_PRECISION, & - 0, MPI_COMM_WORLD, ierr) + call gather(send_buffer, recv_buffer) if (on_master()) then do rank=1, get_num_ranks() diff --git a/finite_difference/src/parallel/parallel_utils_mod.f90 b/finite_difference/src/parallel/parallel_utils_mod.f90 index 4efa9bc..333853e 100644 --- a/finite_difference/src/parallel/parallel_utils_mod.f90 +++ b/finite_difference/src/parallel/parallel_utils_mod.f90 @@ -64,7 +64,7 @@ module parallel_utils_mod public parallel_init, parallel_finalise, parallel_abort, get_max_tag public get_rank, get_num_ranks, post_receive, post_send, global_sum - public msg_wait, msg_wait_all + public msg_wait, msg_wait_all, gather public MSG_UNDEFINED, MSG_REQUEST_NULL, DIST_MEM_ENABLED contains @@ -103,7 +103,6 @@ end subroutine parallel_finalise !! @param[in] msg Message to print - reason we're stopping subroutine parallel_abort(msg) use iso_fortran_env, only : error_unit ! access computing environment - implicit none character(len=*), intent(in) :: msg write(error_unit, *) msg @@ -238,4 +237,21 @@ subroutine global_sum(var) end subroutine global_sum + !================================================ + + subroutine gather(send_buffer, recv_buffer) + !> Gathers the data in the send buffer from all + !> processes into the receive buffer. + real(go_wp), dimension(:) :: send_buffer, recv_buffer + + integer :: ierr + integer :: n + + n = size(send_buffer) + call MPI_Gather(send_buffer, n, MPI_DOUBLE_PRECISION, & + recv_buffer, n, MPI_DOUBLE_PRECISION, & + 0, MPI_COMM_WORLD, ierr) + + end subroutine gather + end module parallel_utils_mod diff --git a/finite_difference/src/parallel/parallel_utils_stub_mod.f90 b/finite_difference/src/parallel/parallel_utils_stub_mod.f90 index cc3fc82..ad04d47 100644 --- a/finite_difference/src/parallel/parallel_utils_stub_mod.f90 +++ b/finite_difference/src/parallel/parallel_utils_stub_mod.f90 @@ -45,7 +45,7 @@ module parallel_utils_mod logical, parameter :: DIST_MEM_ENABLED = .False. public parallel_init, parallel_finalise, parallel_abort - public get_rank, get_num_ranks, get_max_tag + public get_rank, get_num_ranks, get_max_tag, gather public msg_wait, msg_wait_all, post_receive, post_send, global_sum public MSG_UNDEFINED, MSG_REQUEST_NULL, DIST_MEM_ENABLED @@ -149,4 +149,16 @@ subroutine global_sum(var) real(go_wp), intent(inout) :: var end subroutine global_sum + !================================================ + + subroutine gather(send_buffer, recv_buffer) + !> Gathers the data in the send buffer from all + !> processes into the receive buffer. + real(go_wp), dimension(:) :: send_buffer, recv_buffer + + recv_buffer = send_buffer + + end subroutine gather + + end module parallel_utils_mod From e2af613a2bdd2c894fd6ad2b58df8e86a9c9098b Mon Sep 17 00:00:00 2001 From: Joerg Henrichs Date: Fri, 25 Feb 2022 12:58:50 +1100 Subject: [PATCH 05/20] #64 Reduce buffer size to optimise communication, added more comments. --- finite_difference/src/field_mod.f90 | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/finite_difference/src/field_mod.f90 b/finite_difference/src/field_mod.f90 index 182487a..a28c32e 100644 --- a/finite_difference/src/field_mod.f90 +++ b/finite_difference/src/field_mod.f90 @@ -1317,7 +1317,7 @@ subroutine gather_inner_data(self, global_data) real(go_wp), dimension(:), allocatable :: send_buffer, recv_buffer - integer :: dx, dy, ji, jj, i, n, ierr, rank + integer :: dx, dy, ji, jj, i, n, rank, halo_x, halo_y integer :: x_start, x_stop, y_start, y_stop allocate(global_data(self%grid%global_nx, self%grid%global_ny)) @@ -1335,9 +1335,17 @@ subroutine gather_inner_data(self, global_data) return endif - n = self%grid%decomp%max_width *self%grid%decomp%max_height + ! Determine maximum size of data to be sent. We don't need + ! to sent the halo, so reduce max_width and max_height by + ! 2*halo size. + halo_x = self%internal%xstart-1 + halo_y = self%internal%ystart-1 + n = (self%grid%decomp%max_width - 2*halo_x) * & + (self%grid%decomp%max_height - 2*halo_y) allocate(send_buffer(n)) allocate(recv_buffer(n*get_num_ranks())) + + ! Copy data into 1D send buffer. i = 0 do jj= self%internal%ystart, self%internal%ystop do ji = self%internal%xstart, self%internal%xstop @@ -1346,9 +1354,11 @@ subroutine gather_inner_data(self, global_data) end do end do + ! Collect all send_buffers on the master: call gather(send_buffer, recv_buffer) if (on_master()) then + ! Copy the data from each process into the global array do rank=1, get_num_ranks() x_start = self%grid%decomp%subdomains(rank)%global%xstart x_stop = self%grid%decomp%subdomains(rank)%global%xstop From 0f77d9472ddd4f64394d7700c52a328546484107 Mon Sep 17 00:00:00 2001 From: Joerg Henrichs Date: Mon, 21 Aug 2023 21:32:13 +1000 Subject: [PATCH 06/20] #1623 Ignore build directory. --- doc/.gitignore | 1 + 1 file changed, 1 insertion(+) create mode 100644 doc/.gitignore diff --git a/doc/.gitignore b/doc/.gitignore new file mode 100644 index 0000000..e35d885 --- /dev/null +++ b/doc/.gitignore @@ -0,0 +1 @@ +_build From 483a0a31b77f9e22fa0e323bac796a97a21412bc Mon Sep 17 00:00:00 2001 From: Joerg Henrichs Date: Wed, 23 Aug 2023 14:58:31 +1000 Subject: [PATCH 07/20] #64 Use only the .o files for the required library version (not all .o, which can result in mpi and non-mpi object files mixed in a non-mpi library). --- finite_difference/src/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/finite_difference/src/Makefile b/finite_difference/src/Makefile index d9e3707..6baabb7 100644 --- a/finite_difference/src/Makefile +++ b/finite_difference/src/Makefile @@ -89,7 +89,7 @@ all: ${API_LIB} # Create the archive. ${API_LIB}: ${MODULES} - ${AR} ${ARFLAGS} ${API_LIB} *.o + ${AR} ${ARFLAGS} ${API_LIB} $(MODULES) install: ${MAKE} -C .. install From 129beaa611f34dc777c199a31d06b870fd4f863a Mon Sep 17 00:00:00 2001 From: Joerg Henrichs Date: Sun, 7 Jan 2024 11:14:36 +1100 Subject: [PATCH 08/20] #64 Added TODO back in. --- finite_difference/src/grid_mod.f90 | 1 + 1 file changed, 1 insertion(+) diff --git a/finite_difference/src/grid_mod.f90 b/finite_difference/src/grid_mod.f90 index 759e282..7a9fb57 100644 --- a/finite_difference/src/grid_mod.f90 +++ b/finite_difference/src/grid_mod.f90 @@ -393,6 +393,7 @@ subroutine grid_init(grid, dxarg, dyarg, tmask) do ji = xstop+2, grid%nx grid%tmask(ji, :) = grid%tmask(xstop+1, :) end do + !> TODO add support for PBCs in parallel end if ! T-mask supplied ! For a regular, orthogonal mesh the spatial resolution is constant From 0bd09052c7af5f6a47ab2067622a04ebeb217f1d Mon Sep 17 00:00:00 2001 From: Joerg Henrichs Date: Sun, 7 Jan 2024 11:15:12 +1100 Subject: [PATCH 09/20] #64 Checked allocate return status. --- finite_difference/src/field_mod.f90 | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/finite_difference/src/field_mod.f90 b/finite_difference/src/field_mod.f90 index a28c32e..a673452 100644 --- a/finite_difference/src/field_mod.f90 +++ b/finite_difference/src/field_mod.f90 @@ -1318,9 +1318,13 @@ subroutine gather_inner_data(self, global_data) real(go_wp), dimension(:), allocatable :: send_buffer, recv_buffer integer :: dx, dy, ji, jj, i, n, rank, halo_x, halo_y - integer :: x_start, x_stop, y_start, y_stop + integer :: x_start, x_stop, y_start, y_stop, ierr - allocate(global_data(self%grid%global_nx, self%grid%global_ny)) + allocate(global_data(self%grid%global_nx, self%grid%global_ny), & + stat=ierr) + if(ierr /= 0)then + call gocean_stop('gather_inner_data failed to allocate global result array') + end if ! No MPI (or single process), just copy the data out if (get_num_ranks() == 1) then @@ -1342,8 +1346,14 @@ subroutine gather_inner_data(self, global_data) halo_y = self%internal%ystart-1 n = (self%grid%decomp%max_width - 2*halo_x) * & (self%grid%decomp%max_height - 2*halo_y) - allocate(send_buffer(n)) - allocate(recv_buffer(n*get_num_ranks())) + allocate(send_buffer(n), stat=ierr) + if(ierr /= 0)then + call gocean_stop('gather_inner_data failed to allocate send buffer') + end if + allocate(recv_buffer(n*get_num_ranks()), stat=ierr) + if(ierr /= 0)then + call gocean_stop('gather_inner_data failed to allocate receive buffer') + end if ! Copy data into 1D send buffer. i = 0 From 35f556f1bee848153b6ebe8276ee5c1e600993e7 Mon Sep 17 00:00:00 2001 From: Joerg Henrichs Date: Mon, 8 Jan 2024 13:36:04 +1100 Subject: [PATCH 10/20] #64 Added tests for new reduction and initialisation functionality. --- finite_difference/tests/dist_mem/Makefile | 20 +- .../tests/dist_mem/test_reduction.f90 | 194 ++++++++++++++++++ 2 files changed, 211 insertions(+), 3 deletions(-) create mode 100644 finite_difference/tests/dist_mem/test_reduction.f90 diff --git a/finite_difference/tests/dist_mem/Makefile b/finite_difference/tests/dist_mem/Makefile index a4e5a6c..b6b38d7 100644 --- a/finite_difference/tests/dist_mem/Makefile +++ b/finite_difference/tests/dist_mem/Makefile @@ -1,7 +1,7 @@ #------------------------------------------------------------------------------ # BSD 2-Clause License # -# Copyright (c) 2019-2020, Science and Technology Facilities Council. +# Copyright (c) 2019-2024, Science and Technology Facilities Council. # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -27,6 +27,7 @@ #------------------------------------------------------------------------------ # Author: A. R. Porter, STFC Daresbury Laboratory # Modified: S. Siso, STFC Daresbury Laboratory +# Modified: J. Henrichs, Bureau of Meteorology # Makefile for dist_mem tests for the dl_esm_inf finite difference library. # @@ -39,6 +40,12 @@ # export AR=ar # make # + +# Set some useful default values +F90 ?= mpif90 +MPIRUN ?= mpirun +F90FLAGS ?= -g + all: tests # How we launch MPI jobs @@ -49,10 +56,10 @@ INF_DIR=../.. INF_INC = ${INF_DIR}/src INF_LIB = ${INF_DIR}/src/lib_fd.a -.PHONY: test_xsplit test_ysplit test_xysplit inf_lib +.PHONY: test_xsplit test_ysplit test_xysplit test_reduction inf_lib # Compile and run tests. This is a very basic target. -tests: test_xsplit test_ysplit test_xysplit test_gsum +tests: test_xsplit test_ysplit test_xysplit test_gsum test_reduction test_xsplit: inf_lib test_halos.exe JPIGLO=10 JPJGLO=4 ${MPIRUN} -np 2 ./test_halos.exe @@ -68,6 +75,10 @@ test_gsum: inf_lib test_gsum.exe JPIGLO=4 JPJGLO=10 ${MPIRUN} -np 4 ./test_gsum.exe JPIGLO=4 JPJGLO=10 ${MPIRUN} -np 6 ./test_gsum.exe +test_reduction: inf_lib test_reduction.exe + JPIGLO=10 JPJGLO=10 ${MPIRUN} -np 4 ./test_reduction.exe + JPIGLO=10 JPJGLO=10 ${MPIRUN} -np 6 ./test_reduction.exe + inf_lib: ${MAKE} -C ${INF_DIR} F90="${F90}" dm_fd_lib @@ -75,6 +86,9 @@ inf_lib: test_halos.exe: inf_lib test_halos.o ${F90} -o $@ ${LDFLAGS} test_halos.o ${INF_LIB} +test_reduction.exe: inf_lib test_reduction.o + ${F90} -o $@ ${LDFLAGS} test_reduction.o ${INF_LIB} + test_gsum.exe: inf_lib test_gsum.o ${F90} -o $@ ${LDFLAGS} test_gsum.o ${INF_LIB} diff --git a/finite_difference/tests/dist_mem/test_reduction.f90 b/finite_difference/tests/dist_mem/test_reduction.f90 new file mode 100644 index 0000000..88c59e3 --- /dev/null +++ b/finite_difference/tests/dist_mem/test_reduction.f90 @@ -0,0 +1,194 @@ +!------------------------------------------------------------------------------ +! BSD 2-Clause License +! +! Copyright (c) 2019, Science and Technology Facilities Council +! All rights reserved. +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions are met: +! +! * Redistributions of source code must retain the above copyright notice, this +! list of conditions and the following disclaimer. +! +! * Redistributions in binary form must reproduce the above copyright notice, +! this list of conditions and the following disclaimer in the documentation +! and/or other materials provided with the distribution. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +! AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +! IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +! DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +! FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +! DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +! SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +! OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +! OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +!------------------------------------------------------------------------------ +! Author: A. R. Porter, STFC Daresbury Laboratory + +!> Tests for the halo-exchange functionality of the dl_esm_inf library. +!! Domain size must be supplied via the JPIGLO and JPJGLO environment +!! variables. +program test_reduction + use kind_params_mod + use parallel_mod + use grid_mod + use field_mod + use gocean_mod + implicit none + ! Total size of the model domain + integer :: jpiglo = 0, jpjglo = 0 + ! (Uniform) grid resolution + real(go_wp) :: dx = 1.0 + real(go_wp) :: dy = 1.0 + !> The grid on which our fields are defined + type(grid_type), target :: model_grid + !> An example field + type(r2d_field) :: field + + real(go_wp), allocatable :: global_init_data(:,:) + real(go_wp), allocatable :: global_updated_data(:,:) + integer :: my_rank + integer :: ierr, i, j + character(len=10) :: env + + call get_environment_variable("JPIGLO", env) + read(env, '(i10)') jpiglo + call get_environment_variable("JPJGLO", env) + read(env, '(i10)') jpjglo + if(jpiglo < 1 .or. jpjglo < 1)then + stop 'Domain size must be set via $JPIGLO and $JPJGLO' + end if + + call gocean_initialise() + + !> Create our grid object + model_grid = grid_type(GO_ARAKAWA_C, & + (/GO_BC_EXTERNAL,GO_BC_EXTERNAL,GO_BC_NONE/), & + GO_OFFSET_NE) + + !> Generate a domain decomposition. This automatically uses the number + !! of MPI ranks available. + call model_grid%decompose(jpiglo, jpjglo) + my_rank = get_rank() + + if(my_rank == 1) then + write(*,"('Using global domain size of ',I4,'x',I4)") jpiglo, jpjglo + end if + + !> Create global array to hold the global initialisation data + allocate(global_init_data(jpiglo, jpjglo), stat=ierr) + if(ierr /= 0)then + call gocean_stop('Failed to allocate global init data') + end if + + do j=1, jpjglo + global_init_data(:,j) = (/ (unique_global_value(i, j, jpiglo), i=1, jpiglo) /) + enddo + + !> Complete the initialisation of the grid + call grid_init(model_grid, dx, dy) + + !> Create T point field + field = r2d_field(model_grid, GO_T_POINTS, & + init_global_data=global_init_data) + !> Check that the global data is set correctly in the local fields + call check_field_distribution (field) + + !> Update the local data + call update_field(field) + + !> This will allocate global_updated_data: + call field%gather_inner_data(global_updated_data) + + !> The result is only on rank 1: + if (my_rank == 1) then + call check_gathered_field(global_updated_data) + endif + + call gocean_finalise() + +contains + + !> Computes a unique value for each index pair (i,j) (1<=i,j) + !! given that there are n values in a row. It assigns the + !! numbers 0, 1, ..., n*(#rows)-1 to each cell: + function unique_global_value(i, j, n) + implicit none + integer, intent(in) :: i, j, n + integer :: unique_global_value + unique_global_value = (i-1) + (j-1)*n + end function unique_global_value + + ! ----------------------------------------------------------------------------------- + !> Checks that the local data in field corresponds to the original + !! global data: + subroutine check_field_distribution(field) + type(r2d_field), intent(inout) :: field + integer :: ji, jj + integer :: istart, istop, jstart, jstop, xmin, ymin + real(go_wp) :: global_value + istart = field%internal%xstart + istop = field%internal%xstop + jstart = field%internal%ystart + jstop = field%internal%ystop + xmin = field%grid%subdomain%global%xstart + ymin = field%grid%subdomain%global%ystart + + do jj = jstart, jstop + do ji = istart, istop + global_value = unique_global_value(xmin + ji-istart, & + ymin + jj-jstart, & + field%grid%decomp%global_nx) + if (field%data(ji, jj) /= global_value) then + print *,"Incorrect field data for ", ji, jj, field%data(ji, jj), global_value + call gocean_stop('Failed to distribute init data') + endif + + end do + end do + print *,"Field distributed correctly." + end subroutine check_field_distribution + + ! ----------------------------------------------------------------------------------- + !> Adds one to each entry in the local fiels + subroutine update_field(field) + type(r2d_field), intent(inout) :: field + integer :: ji, jj + integer :: istart, istop, jstart, jstop + istart = field%internal%xstart + istop = field%internal%xstop + jstart = field%internal%ystart + jstop = field%internal%ystop + + do jj = jstart, jstop + do ji = istart, istop + field%data(ji,jj) = field%data(ji,jj) + 1 + end do + end do + end subroutine update_field + + !! ----------------------------------------------------------------------------------- + !> Checks if the global data gathered after updating the local fields is correct: + subroutine check_gathered_field(global_field) + real(go_wp), allocatable, dimension(:,:) :: global_field + integer :: ji, jj + real(go_wp) :: global_value + + do jj = 1, size(global_field, 2) + do ji = 1, size(global_field, 1) + global_value = unique_global_value(ji, jj, size(global_field, 1)) + 1 + if (global_field(ji, jj) /= global_value) then + print *, "Incorrect global field data gathered for ", ji, jj, & + global_field(ji, jj), global_value + !call gocean_stop('Failed to gather local data') + endif + + end do + end do + + print *,"Field gathered correctly." + end subroutine check_gathered_field + +end program test_reduction From 33f1abd58062eaa81f8c31522b4b6de572c5841d Mon Sep 17 00:00:00 2001 From: Joerg Henrichs Date: Mon, 8 Jan 2024 13:36:04 +1100 Subject: [PATCH 11/20] #64 Added tests for new reduction and initialisation functionality. --- finite_difference/tests/dist_mem/Makefile | 20 +- .../tests/dist_mem/test_reduction.f90 | 195 ++++++++++++++++++ 2 files changed, 212 insertions(+), 3 deletions(-) create mode 100644 finite_difference/tests/dist_mem/test_reduction.f90 diff --git a/finite_difference/tests/dist_mem/Makefile b/finite_difference/tests/dist_mem/Makefile index a4e5a6c..b6b38d7 100644 --- a/finite_difference/tests/dist_mem/Makefile +++ b/finite_difference/tests/dist_mem/Makefile @@ -1,7 +1,7 @@ #------------------------------------------------------------------------------ # BSD 2-Clause License # -# Copyright (c) 2019-2020, Science and Technology Facilities Council. +# Copyright (c) 2019-2024, Science and Technology Facilities Council. # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -27,6 +27,7 @@ #------------------------------------------------------------------------------ # Author: A. R. Porter, STFC Daresbury Laboratory # Modified: S. Siso, STFC Daresbury Laboratory +# Modified: J. Henrichs, Bureau of Meteorology # Makefile for dist_mem tests for the dl_esm_inf finite difference library. # @@ -39,6 +40,12 @@ # export AR=ar # make # + +# Set some useful default values +F90 ?= mpif90 +MPIRUN ?= mpirun +F90FLAGS ?= -g + all: tests # How we launch MPI jobs @@ -49,10 +56,10 @@ INF_DIR=../.. INF_INC = ${INF_DIR}/src INF_LIB = ${INF_DIR}/src/lib_fd.a -.PHONY: test_xsplit test_ysplit test_xysplit inf_lib +.PHONY: test_xsplit test_ysplit test_xysplit test_reduction inf_lib # Compile and run tests. This is a very basic target. -tests: test_xsplit test_ysplit test_xysplit test_gsum +tests: test_xsplit test_ysplit test_xysplit test_gsum test_reduction test_xsplit: inf_lib test_halos.exe JPIGLO=10 JPJGLO=4 ${MPIRUN} -np 2 ./test_halos.exe @@ -68,6 +75,10 @@ test_gsum: inf_lib test_gsum.exe JPIGLO=4 JPJGLO=10 ${MPIRUN} -np 4 ./test_gsum.exe JPIGLO=4 JPJGLO=10 ${MPIRUN} -np 6 ./test_gsum.exe +test_reduction: inf_lib test_reduction.exe + JPIGLO=10 JPJGLO=10 ${MPIRUN} -np 4 ./test_reduction.exe + JPIGLO=10 JPJGLO=10 ${MPIRUN} -np 6 ./test_reduction.exe + inf_lib: ${MAKE} -C ${INF_DIR} F90="${F90}" dm_fd_lib @@ -75,6 +86,9 @@ inf_lib: test_halos.exe: inf_lib test_halos.o ${F90} -o $@ ${LDFLAGS} test_halos.o ${INF_LIB} +test_reduction.exe: inf_lib test_reduction.o + ${F90} -o $@ ${LDFLAGS} test_reduction.o ${INF_LIB} + test_gsum.exe: inf_lib test_gsum.o ${F90} -o $@ ${LDFLAGS} test_gsum.o ${INF_LIB} diff --git a/finite_difference/tests/dist_mem/test_reduction.f90 b/finite_difference/tests/dist_mem/test_reduction.f90 new file mode 100644 index 0000000..672c55a --- /dev/null +++ b/finite_difference/tests/dist_mem/test_reduction.f90 @@ -0,0 +1,195 @@ +!------------------------------------------------------------------------------ +! BSD 2-Clause License +! +! Copyright (c) 2019, Science and Technology Facilities Council +! All rights reserved. +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions are met: +! +! * Redistributions of source code must retain the above copyright notice, this +! list of conditions and the following disclaimer. +! +! * Redistributions in binary form must reproduce the above copyright notice, +! this list of conditions and the following disclaimer in the documentation +! and/or other materials provided with the distribution. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +! AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +! IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +! DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +! FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +! DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +! SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +! OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +! OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +!------------------------------------------------------------------------------ +! Author: J. Henrichs, Bureau of Meteorology +! Author: A. R. Porter, STFC Daresbury Laboratory + +!> Tests for the halo-exchange functionality of the dl_esm_inf library. +!! Domain size must be supplied via the JPIGLO and JPJGLO environment +!! variables. +program test_reduction + use kind_params_mod + use parallel_mod + use grid_mod + use field_mod + use gocean_mod + implicit none + ! Total size of the model domain + integer :: jpiglo = 0, jpjglo = 0 + ! (Uniform) grid resolution + real(go_wp) :: dx = 1.0 + real(go_wp) :: dy = 1.0 + !> The grid on which our fields are defined + type(grid_type), target :: model_grid + !> An example field + type(r2d_field) :: field + + real(go_wp), allocatable :: global_init_data(:,:) + real(go_wp), allocatable :: global_updated_data(:,:) + integer :: my_rank + integer :: ierr, i, j + character(len=10) :: env + + call get_environment_variable("JPIGLO", env) + read(env, '(i10)') jpiglo + call get_environment_variable("JPJGLO", env) + read(env, '(i10)') jpjglo + if(jpiglo < 1 .or. jpjglo < 1)then + stop 'Domain size must be set via $JPIGLO and $JPJGLO' + end if + + call gocean_initialise() + + !> Create our grid object + model_grid = grid_type(GO_ARAKAWA_C, & + (/GO_BC_EXTERNAL,GO_BC_EXTERNAL,GO_BC_NONE/), & + GO_OFFSET_NE) + + !> Generate a domain decomposition. This automatically uses the number + !! of MPI ranks available. + call model_grid%decompose(jpiglo, jpjglo) + my_rank = get_rank() + + if(my_rank == 1) then + write(*,"('Using global domain size of ',I4,'x',I4)") jpiglo, jpjglo + end if + + !> Create global array to hold the global initialisation data + allocate(global_init_data(jpiglo, jpjglo), stat=ierr) + if(ierr /= 0)then + call gocean_stop('Failed to allocate global init data') + end if + + do j=1, jpjglo + global_init_data(:,j) = (/ (unique_global_value(i, j, jpiglo), i=1, jpiglo) /) + enddo + + !> Complete the initialisation of the grid + call grid_init(model_grid, dx, dy) + + !> Create T point field + field = r2d_field(model_grid, GO_T_POINTS, & + init_global_data=global_init_data) + !> Check that the global data is set correctly in the local fields + call check_field_distribution (field) + + !> Update the local data + call update_field(field) + + !> This will allocate global_updated_data: + call field%gather_inner_data(global_updated_data) + + !> The result is only on rank 1: + if (my_rank == 1) then + call check_gathered_field(global_updated_data) + endif + + call gocean_finalise() + +contains + + !> Computes a unique value for each index pair (i,j) (1<=i,j) + !! given that there are n values in a row. It assigns the + !! numbers 0, 1, ..., n*(#rows)-1 to each cell: + function unique_global_value(i, j, n) + implicit none + integer, intent(in) :: i, j, n + integer :: unique_global_value + unique_global_value = (i-1) + (j-1)*n + end function unique_global_value + + ! ----------------------------------------------------------------------------------- + !> Checks that the local data in field corresponds to the original + !! global data: + subroutine check_field_distribution(field) + type(r2d_field), intent(inout) :: field + integer :: ji, jj + integer :: istart, istop, jstart, jstop, xmin, ymin + real(go_wp) :: global_value + istart = field%internal%xstart + istop = field%internal%xstop + jstart = field%internal%ystart + jstop = field%internal%ystop + xmin = field%grid%subdomain%global%xstart + ymin = field%grid%subdomain%global%ystart + + do jj = jstart, jstop + do ji = istart, istop + global_value = unique_global_value(xmin + ji-istart, & + ymin + jj-jstart, & + field%grid%decomp%global_nx) + if (field%data(ji, jj) /= global_value) then + print *,"Incorrect field data for ", ji, jj, field%data(ji, jj), global_value + call gocean_stop('Failed to distribute init data') + endif + + end do + end do + print *,"Field distributed correctly." + end subroutine check_field_distribution + + ! ----------------------------------------------------------------------------------- + !> Adds one to each entry in the local fiels + subroutine update_field(field) + type(r2d_field), intent(inout) :: field + integer :: ji, jj + integer :: istart, istop, jstart, jstop + istart = field%internal%xstart + istop = field%internal%xstop + jstart = field%internal%ystart + jstop = field%internal%ystop + + do jj = jstart, jstop + do ji = istart, istop + field%data(ji,jj) = field%data(ji,jj) + 1 + end do + end do + end subroutine update_field + + !! ----------------------------------------------------------------------------------- + !> Checks if the global data gathered after updating the local fields is correct: + subroutine check_gathered_field(global_field) + real(go_wp), allocatable, dimension(:,:) :: global_field + integer :: ji, jj + real(go_wp) :: global_value + + do jj = 1, size(global_field, 2) + do ji = 1, size(global_field, 1) + global_value = unique_global_value(ji, jj, size(global_field, 1)) + 1 + if (global_field(ji, jj) /= global_value) then + print *, "Incorrect global field data gathered for ", ji, jj, & + global_field(ji, jj), global_value + !call gocean_stop('Failed to gather local data') + endif + + end do + end do + + print *,"Field gathered correctly." + end subroutine check_gathered_field + +end program test_reduction From 86c442a66792b6bc55246a7950fe1daff31d0ee1 Mon Sep 17 00:00:00 2001 From: Joerg Henrichs Date: Mon, 8 Jan 2024 13:42:04 +1100 Subject: [PATCH 12/20] #64 Improved documentation. --- doc/api.rst | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/doc/api.rst b/doc/api.rst index f674198..444fe5a 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -133,9 +133,13 @@ The constructor takes two mandatory and two optional arguments: 3. ``do_tile``: If the field should be tiled among all threads, or if only a single field should be allocated (which is not currently supported by PSyclone). - 4. ``init_global_data``: a global 2D Fortran array, which must be + 4. ``init_global_data``: an optional global 2D Fortran array, which must be provided on each rank. On each rank the field will be initialised - with the data from the corresponding subdomain. + with the data from the corresponding subdomain. This is just a convenience + for users with a small problem size, since typically for large data sets + using a global array will create scalability problems. In general, it is + the responsibility of the user to initialise an array with the required + local data. Note that the grid object must have been fully configured (by a call to ``grid_init`` for instance) before it is passed into this From b6a727fd92334a11e32fd4ef54301b1379e28b07 Mon Sep 17 00:00:00 2001 From: Joerg Henrichs Date: Mon, 29 Jan 2024 11:15:04 +1100 Subject: [PATCH 13/20] #64 omp-parallelise initialisation loop. --- finite_difference/src/field_mod.f90 | 2 ++ 1 file changed, 2 insertions(+) diff --git a/finite_difference/src/field_mod.f90 b/finite_difference/src/field_mod.f90 index a673452..8a6c98f 100644 --- a/finite_difference/src/field_mod.f90 +++ b/finite_difference/src/field_mod.f90 @@ -378,6 +378,8 @@ function r2d_field_constructor(grid, & dx = grid%subdomain%global%xstart - self%internal%xstart dy = grid%subdomain%global%ystart - self%internal%ystart +!$OMP PARALLEL DO schedule(runtime), default(none), & +!$OMP private(ji,jj), shared(self, grid, init_global_data, dx, dy) do jj = grid%subdomain%internal%ystart, grid%subdomain%internal%ystop do ji = grid%subdomain%internal%xstart, grid%subdomain%internal%xstop self%data(ji, jj) = init_global_data(ji+dx, jj+dy) From be87b098a29636d064e9176da40c6638b0bb0595 Mon Sep 17 00:00:00 2001 From: Joerg Henrichs Date: Mon, 29 Jan 2024 11:37:22 +1100 Subject: [PATCH 14/20] #64 Updated comments. --- finite_difference/tests/dist_mem/test_reduction.f90 | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/finite_difference/tests/dist_mem/test_reduction.f90 b/finite_difference/tests/dist_mem/test_reduction.f90 index 672c55a..f85a4c6 100644 --- a/finite_difference/tests/dist_mem/test_reduction.f90 +++ b/finite_difference/tests/dist_mem/test_reduction.f90 @@ -1,7 +1,7 @@ !------------------------------------------------------------------------------ ! BSD 2-Clause License ! -! Copyright (c) 2019, Science and Technology Facilities Council +! Copyright (c) 2024, Science and Technology Facilities Council ! All rights reserved. ! ! Redistribution and use in source and binary forms, with or without @@ -28,7 +28,7 @@ ! Author: J. Henrichs, Bureau of Meteorology ! Author: A. R. Porter, STFC Daresbury Laboratory -!> Tests for the halo-exchange functionality of the dl_esm_inf library. +!> Tests for the reduction functionality of the dl_esm_inf library. !! Domain size must be supplied via the JPIGLO and JPJGLO environment !! variables. program test_reduction @@ -183,8 +183,7 @@ subroutine check_gathered_field(global_field) if (global_field(ji, jj) /= global_value) then print *, "Incorrect global field data gathered for ", ji, jj, & global_field(ji, jj), global_value - !call gocean_stop('Failed to gather local data') - endif + endif end do end do From f3ce7f6723bd4298045743a75b6e600c885feebf Mon Sep 17 00:00:00 2001 From: Joerg Henrichs Date: Mon, 29 Jan 2024 13:24:20 +1100 Subject: [PATCH 15/20] #64 Added license to grid_mod, and updated years. --- finite_difference/src/field_mod.f90 | 1 + finite_difference/src/grid_mod.f90 | 30 +++++++++++++++++++++++++++++ 2 files changed, 31 insertions(+) diff --git a/finite_difference/src/field_mod.f90 b/finite_difference/src/field_mod.f90 index 8a6c98f..59e577d 100644 --- a/finite_difference/src/field_mod.f90 +++ b/finite_difference/src/field_mod.f90 @@ -26,6 +26,7 @@ ! OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. !------------------------------------------------------------------------------ ! Author: A. R. Porter, STFC Daresbury Laboratory +! Modified: J. Henrichs, Australian Bureau of Meteorology !> Module for describing all aspects of a field (which exists on some !! grid). diff --git a/finite_difference/src/grid_mod.f90 b/finite_difference/src/grid_mod.f90 index 7a9fb57..fe67ce8 100644 --- a/finite_difference/src/grid_mod.f90 +++ b/finite_difference/src/grid_mod.f90 @@ -1,3 +1,33 @@ +!------------------------------------------------------------------------------ +! BSD 2-Clause License +! +! Copyright (c) 2017-2024, Science and Technology Facilities Council +! All rights reserved. +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions are met: +! +! * Redistributions of source code must retain the above copyright notice, this +! list of conditions and the following disclaimer. +! +! * Redistributions in binary form must reproduce the above copyright notice, +! this list of conditions and the following disclaimer in the documentation +! and/or other materials provided with the distribution. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +! AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +! IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +! DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +! FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +! DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +! SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +! OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +! OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +!------------------------------------------------------------------------------ +! Author: A. R. Porter, STFC Daresbury Laboratory +! Modified: J. Henrichs, Australian Bureau of Meteorology + !> Module for describing and storing all information related to !! a finite-difference grid. module grid_mod From dd9eab12a27510eceff1257658d8cd6ad7fdd3c3 Mon Sep 17 00:00:00 2001 From: Joerg Henrichs Date: Wed, 31 Jan 2024 11:58:56 +1100 Subject: [PATCH 16/20] #64 Abort early if PBC and no tmask is specified. Create dummy tmask if no tmask is specified. --- finite_difference/src/grid_mod.f90 | 38 +++++++++++++++++++++++------- 1 file changed, 30 insertions(+), 8 deletions(-) diff --git a/finite_difference/src/grid_mod.f90 b/finite_difference/src/grid_mod.f90 index fe67ce8..a59a2f5 100644 --- a/finite_difference/src/grid_mod.f90 +++ b/finite_difference/src/grid_mod.f90 @@ -387,15 +387,18 @@ subroutine grid_init(grid, dxarg, dyarg, tmask) ystart = grid%subdomain%internal%ystart ystop = grid%subdomain%internal%ystop - ! Copy-in the externally-supplied T-mask, if any. If using OpenMP - ! then apply first-touch policy for data locality. + ! Even if no tmask is supplied, we will create one (with all wet points) + ! so that kernels that query the tmask will get useful values + allocate(grid%tmask(grid%nx, grid%ny), stat=ierr(1)) + if( ierr(1) /= 0 )then + call gocean_stop('grid_init: failed to allocate array for T mask') + end if + if( present(tmask) )then - allocate(grid%tmask(grid%nx, grid%ny), stat=ierr(1)) - if( ierr(1) /= 0 )then - call gocean_stop('grid_init: failed to allocate array for T mask') - end if -!> TODO should use thread tiling here but that is currently only set-up -!! inside a field object. + ! Copy-in the externally-supplied T-mask, if any. If using OpenMP + ! then apply first-touch policy for data locality. + !> TODO should use thread tiling here but that is currently only set-up + !! inside a field object. !$OMP PARALLEL DO schedule(runtime), default(none), private(ji,jj), & !$OMP shared(grid, tmask, ystart, ystop, xstart, xstop) do jj = ystart-1, ystop+1 @@ -424,6 +427,25 @@ subroutine grid_init(grid, dxarg, dyarg, tmask) grid%tmask(ji, :) = grid%tmask(xstop+1, :) end do !> TODO add support for PBCs in parallel + else + ! No T-mask supplied. Check if grid has PBCs, which isn't + ! supported yet + if( (grid%boundary_conditions(1) == GO_BC_PERIODIC) .or. & + (grid%boundary_conditions(2) == GO_BC_PERIODIC) ) then + call gocean_stop('grid_init: ERROR: Periodic boundary conditions ' & + & // 'are not yet supported.') + end if + ! Initialise an artificial all-wet tmask. If using OpenMP then apply + ! first-touch policy for data locality. + !$OMP PARALLEL DO schedule(runtime), default(none), private(ji,jj), & + !$OMP shared(grid, ystart, ystop, xstart, xstop) + do jj = ystart-1, ystop+1 + do ji = xstart-1, xstop+1 + grid%tmask(ji,jj) = 1 + end do + end do + !$OMP END PARALLEL DO + end if ! T-mask supplied ! For a regular, orthogonal mesh the spatial resolution is constant From a7d7479f833aa62b1a103d562ccb1b7a061f2cd4 Mon Sep 17 00:00:00 2001 From: Joerg Henrichs Date: Wed, 31 Jan 2024 12:42:44 +1100 Subject: [PATCH 17/20] #64 Allow periodic boundary condition in non-distributed memory settings. --- finite_difference/src/grid_mod.f90 | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/finite_difference/src/grid_mod.f90 b/finite_difference/src/grid_mod.f90 index a59a2f5..6cf98aa 100644 --- a/finite_difference/src/grid_mod.f90 +++ b/finite_difference/src/grid_mod.f90 @@ -183,7 +183,8 @@ end function get_tmask subroutine decompose(self, domainx, domainy, & ndomains, ndomainx, ndomainy, & halo_width) - use parallel_mod, only: get_num_ranks, get_rank, go_decompose + use parallel_mod, only: get_num_ranks, get_num_ranks, get_rank, & + go_decompose implicit none class (grid_type), target, intent(inout) :: self !> Dimensions of the domain to decompose @@ -430,8 +431,9 @@ subroutine grid_init(grid, dxarg, dyarg, tmask) else ! No T-mask supplied. Check if grid has PBCs, which isn't ! supported yet - if( (grid%boundary_conditions(1) == GO_BC_PERIODIC) .or. & - (grid%boundary_conditions(2) == GO_BC_PERIODIC) ) then + if( (get_num_ranks() > 1) .and. & + ( (grid%boundary_conditions(1) == GO_BC_PERIODIC) .or. & + (grid%boundary_conditions(2) == GO_BC_PERIODIC) ) ) then call gocean_stop('grid_init: ERROR: Periodic boundary conditions ' & & // 'are not yet supported.') end if From 79bc75848f103c6d5fd34cd6076538734a7023d3 Mon Sep 17 00:00:00 2001 From: Joerg Henrichs Date: Wed, 31 Jan 2024 12:46:04 +1100 Subject: [PATCH 18/20] #64 Updated comments. --- finite_difference/src/grid_mod.f90 | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/finite_difference/src/grid_mod.f90 b/finite_difference/src/grid_mod.f90 index 6cf98aa..4a05d86 100644 --- a/finite_difference/src/grid_mod.f90 +++ b/finite_difference/src/grid_mod.f90 @@ -430,13 +430,15 @@ subroutine grid_init(grid, dxarg, dyarg, tmask) !> TODO add support for PBCs in parallel else ! No T-mask supplied. Check if grid has PBCs, which isn't - ! supported yet - if( (get_num_ranks() > 1) .and. & - ( (grid%boundary_conditions(1) == GO_BC_PERIODIC) .or. & - (grid%boundary_conditions(2) == GO_BC_PERIODIC) ) ) then + ! supported yet in case of distributed memory usage (i.e. + ! if there is more than one process) + if ( (get_num_ranks() > 1) .and. & + ( (grid%boundary_conditions(1) == GO_BC_PERIODIC) .or. & + (grid%boundary_conditions(2) == GO_BC_PERIODIC) ) ) then call gocean_stop('grid_init: ERROR: Periodic boundary conditions ' & & // 'are not yet supported.') end if + ! Initialise an artificial all-wet tmask. If using OpenMP then apply ! first-touch policy for data locality. !$OMP PARALLEL DO schedule(runtime), default(none), private(ji,jj), & From 685f19f905bd4345b7a3eac4bcf8d25343b2fa6c Mon Sep 17 00:00:00 2001 From: Joerg Henrichs Date: Wed, 31 Jan 2024 13:01:04 +1100 Subject: [PATCH 19/20] #64 Updatd documentation and comments. --- doc/api.rst | 14 ++++++++++++-- finite_difference/src/grid_mod.f90 | 10 ++++++---- 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/doc/api.rst b/doc/api.rst index 444fe5a..22be503 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -50,12 +50,18 @@ Name Description =============== ========================================= GO_BC_NONE No boundary conditions are applied. GO_BC_EXTERNAL Some external forcing is applied. This must be implemented by a kernel. + The domain can be specified using a ``tmask``, but if no ``tmask`` is + specified, a dummy ``tmask`` is created that will define an all ocean + domain. GO_BC_PERIODIC Periodic boundary conditions are applied. =============== ========================================= The infrastructure requires this information in order to determine the extent of the model grid. +Note that at this stage ``GO_BC_PERIODIC`` is not supported when +using distributed memory. This is tracked in issue #54. + The index offset is required because a model (kernel) developer has choice in how they actually implement the staggering of variables on a grid. This comes down to a choice of which grid points in the vicinity @@ -98,8 +104,12 @@ object. This is done via a call to the ``grid_init`` subroutine:: !! wet (1), dry (0) or external (-1). integer, dimension(m,n), intent(in), optional :: tmask -It should be noted that currently only grids with constant -resolution in *x* and *y* are supported by this routine. + +If no T-mask is supplied then this routine configures the grid +appropriately for an all-wet domain by allocating a default +T-mask. It should also be noted that currently only grids with +constant resolution in *x* and *y* are supported by this routine. + .. _gocean1.0-fields: diff --git a/finite_difference/src/grid_mod.f90 b/finite_difference/src/grid_mod.f90 index 4a05d86..5cbdcc2 100644 --- a/finite_difference/src/grid_mod.f90 +++ b/finite_difference/src/grid_mod.f90 @@ -96,9 +96,9 @@ module grid_mod !! -1 == wet outside simulated region !! This is the key quantity that determines the region that !! is actually simulated. However, we also support the - !! specification of a model consisting entirely of wet points - !! Since this does not require a T-mask, we do not allocate - !! this array for that case. + !! specification of a model consisting entirely of wet points. + !! In this case a dummy tmask will be allocated, set to indicate + !! an all wet domain. integer, allocatable :: tmask(:,:) !> Pointer to tmask on remote device (if any) type(c_ptr) :: tmask_device @@ -324,7 +324,9 @@ end function grid_constructor !! @param[in] dxarg Grid spacing in x dimension !! @param[in] dyarg Grid spacing in y dimension !! @param[in] tmask Array holding the T-point mask which defines - !! the contents of the local domain. + !! the contents of the local domain. Need not be + !! supplied if domain is all wet, in which case a + !! dummy all-wet tmask will be created internally. subroutine grid_init(grid, dxarg, dyarg, tmask) use decomposition_mod, only: subdomain_type, decomposition_type use parallel_mod, only: map_comms, get_rank, get_num_ranks, on_master From 18fed02fec55c29e5cab5e7e221d18712470ce95 Mon Sep 17 00:00:00 2001 From: Joerg Henrichs Date: Wed, 31 Jan 2024 13:01:38 +1100 Subject: [PATCH 20/20] #64 Updated documentation conf.py file to remove warnings. --- doc/conf.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/conf.py b/doc/conf.py index 5d3c782..2624c93 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -62,7 +62,7 @@ # # This is also used if you do content translation via gettext catalogs. # Usually you set "language" from the command line for these cases. -language = None +# language = None # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. @@ -89,7 +89,7 @@ # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['_static'] +# html_static_path = ['_static'] # Custom sidebar templates, must be a dictionary that maps document names # to template names. @@ -182,7 +182,7 @@ # -- Options for intersphinx extension --------------------------------------- # Example configuration for intersphinx: refer to the Python standard library. -intersphinx_mapping = {'https://docs.python.org/': None} +intersphinx_mapping = {'python': ('https://docs.python.org/', None)} # -- Options for todo extension ----------------------------------------------