From 90c1adff687146122a166ab999c796e62960e6cc Mon Sep 17 00:00:00 2001 From: VanillaBrooks Date: Thu, 10 Nov 2022 17:53:21 -0800 Subject: [PATCH 1/5] dissipation rate calculated in streams, with python bindings --- python/main.py | 3 ++ src/Makefile | 4 +- src/dissipation.F90 | 125 ++++++++++++++++++++++++++++++++++++++++++++ src/min_api.F90 | 8 +++ src/mod_streams.F90 | 6 +++ 5 files changed, 144 insertions(+), 2 deletions(-) create mode 100644 src/dissipation.F90 diff --git a/python/main.py b/python/main.py index 9446cce..3ff1104 100755 --- a/python/main.py +++ b/python/main.py @@ -92,6 +92,9 @@ time += streams.mod_streams.dtglobal time_array[:] = time + streams.wrap_dissipation_calculation() + utils.hprint(f"dissipation rate is {streams.mod_streams.dissipation_rate}") + if (i % config.temporal.span_average_io_steps) == 0: utils.hprint("writing span average to output") streams.wrap_copy_gpu_to_cpu() diff --git a/src/Makefile b/src/Makefile index 8f6a3c4..11563bf 100644 --- a/src/Makefile +++ b/src/Makefile @@ -54,7 +54,7 @@ OBJ_FILES = alloc.o bcdf.o bcextr.o bcfree.o bc.o bcrecyc.o bcrelax.o bcshk.o bc setup.o solver.o startmpi.o stats.o step.o target_reystress.o tbforce.o updateghost.o utility.o utyibm.o \ visflx.o writedf.o writefield.o writefieldvtk.o writegridplot3d.o writerst.o \ writestatbl.o writestatchann.o writestat.o writestatzbl.o write_wallpressure.o visflx_stag.o \ - write_probe_data.o bcblow.o + write_probe_data.o bcblow.o dissipation.o PY_API_OBJS = min_api.o @@ -67,7 +67,7 @@ STATIC_FILES = alloc.F90 bcdf.F90 bcextr.F90 bcfree.F90 bc.F90 bcrecyc.F90 bcrel setup.F90 solver.F90 startmpi.F90 stats.F90 step.F90 target_reystress.F90 tbforce.F90 updateghost.F90 utility.F90 utyibm.F90 \ visflx.F90 writedf.F90 writefield.F90 writefieldvtk.F90 writegridplot3d.F90 writerst.F90 \ writestatbl.F90 writestatchann.F90 writestat.F90 writestatzbl.F90 write_wallpressure.F90 visflx_stag.F90 \ - write_probe_data.F90 bcblow.F90 + write_probe_data.F90 bcblow.F90 dissipation.F90 STATIC_MODS = mod_streams.F90 mod_sys.F90 euler.F90 diff --git a/src/dissipation.F90 b/src/dissipation.F90 new file mode 100644 index 0000000..95a41b3 --- /dev/null +++ b/src/dissipation.F90 @@ -0,0 +1,125 @@ +! calculate the dissipation rate. See lab streams documentation for +! `Calculated Quantities` for the equation being calculated +! +! after calculation, the value is stored in `dissipation_rate` in mod_streams +subroutine dissipation_calculation() + use mod_streams + + implicit none + + integer :: i, j, k + + real*8 :: uu, vv, ww, rho + real*8 :: dudx, dvdy, dwdz + real*8 :: dx, dy, dz + real*8 :: mpi_dissipation_sum + + ! dissipation_rate is defined in mod_streams, we reset its value here + dissipation_rate = 0.0 + + mpi_dissipation_sum = 0.0 + + ! w_gpu(:, :, :, 1) -> rho + ! w_gpu(:, :, :, 2) -> rho u + ! w_gpu(:, :, :, 3) -> rho v + ! w_gpu(:, :, :, 4) -> rho w + ! w_gpu(:, :, :, 5) -> rho E + + !$cuf kernel do(3) <<<*,*>>> + do k = 1,nz + do j = 1,ny + do i = 1,nx + + ! + ! du/dx + ! + dudx = ( & + ( & + -1 * w_gpu(i+2, j, k, 2) / w_gpu(i+2, j, k, 1) & + ) & + + & + ( & + 8 * w_gpu(i+1, j, k, 2) / w_gpu(i+1, j, k, 1) & + ) & + - & + ( & + 8 * w_gpu(i-1, j, k, 2) / w_gpu(i-1, j, k, 1) & + ) & + + & + ( & + w_gpu(i-2, j, k, 2) / w_gpu(i-2, j, k, 1) & + ) & + ) & + ! replace 12 * \Delta x by 6 * (2 \Delta x) since x can vary + ! with grid size (Without really looking at grid mesh generation + / ( 6 * ( x_gpu(i+1) - x_gpu(i-1))) + + ! + ! dv/dy + ! + dvdy = ( & + ( & + -1 * w_gpu(i, j+2, k, 3) / w_gpu(i, j+2, k, 1) & + ) & + + & + ( & + 8 * w_gpu(i, j+1, k, 3) / w_gpu(i, j+1, k, 1) & + ) & + - & + ( & + 8 * w_gpu(i, j-1, k, 3) / w_gpu(i, j-1, k, 1) & + ) & + + & + ( & + w_gpu(i, j-2, k, 3) / w_gpu(i, j-2, k, 1) & + ) & + ) & + ! replace 12 * \Delta y by 6 * (2 \Delta y) since y can vary + ! with grid size (Without really looking at grid mesh generation + / ( 6 * ( y_gpu(j+1) - y_gpu(j-1))) + + ! + ! dw/dz + ! + dwdz = ( & + ( & + -1 * w_gpu(i, j, k+2, 4) / w_gpu(i, j, k+2, 1) & + ) & + + & + ( & + 8 * w_gpu(i, j, k+1, 4) / w_gpu(i, j, k+1, 1) & + ) & + - & + ( & + 8 * w_gpu(i, j, k-1, 4) / w_gpu(i, j, k-1, 1) & + ) & + + & + ( & + w_gpu(i, j, k-2, 4) / w_gpu(i, j, k-2, 1) & + ) & + ) & + ! replace 12 * \Delta z by 6 * (2 \Delta z) since z can vary + ! with grid size (Without really looking at grid mesh generation + / ( 6 * ( z_gpu(k+1) - z_gpu(k-1))) + + dx = (x_gpu(i-1) + x_gpu(i+1)) / 2 + dy = (y_gpu(j-1) + y_gpu(j+1)) / 2 + dz = (z_gpu(k-1) + z_gpu(k+1)) / 2 + + dissipation_rate = dissipation_rate + & + (dudx**2 + dvdy**2 + dwdz**2) * dx * dy * dz + enddo + enddo + enddo + !@cuf iercuda=cudaDeviceSynchronize() + + dissipation_rate = dissipation_rate / (rlx * rly * rlz) + + ! sum all the values across MPI, store the result in mpi_dissipation_sum + call MPI_REDUCE(dissipation_rate, mpi_dissipation_sum, 1, mpi_prec, MPI_SUM, 0, MPI_COMM_WORLD, iermpi) + + ! distribute mpi_dissipation_sum to all MPI procs + call MPI_BCAST(mpi_dissipation_sum, 1, mpi_prec, 0, MPI_COMM_WORLD, iermpi) + + dissipation_rate = mpi_dissipation_sum +endsubroutine dissipation_calculation diff --git a/src/min_api.F90 b/src/min_api.F90 index 67b3922..78d6783 100644 --- a/src/min_api.F90 +++ b/src/min_api.F90 @@ -82,3 +82,11 @@ subroutine wrap_tauw_calculate() end if ! end subroutine + +! calculate dissipation rate. See inner subroutine documentation. +! after calculation, the value is stored in `dissipation_rate` in mod_streams +subroutine wrap_dissipation_calculation() + implicit none + + call dissipation_calculation() +end subroutine diff --git a/src/mod_streams.F90 b/src/mod_streams.F90 index 4f40ad1..a6ec0ea 100644 --- a/src/mod_streams.F90 +++ b/src/mod_streams.F90 @@ -81,6 +81,12 @@ module mod_streams ! boundary condition integer (see bc.f90) to indicate that the bottom boundary ! of the SBLI case should be blowing integer, parameter :: blowing_sbli_boundary_condition = 11 + + + ! dissipation_rate is calculated in dissipation.F90, subroutine dissipation_calculation +!f2py real*8 :: dissipation_rate + real(mykind) :: dissipation_rate + integer :: iflow integer :: idiski, ndim integer :: istore, istore_restart From a249c92ceafc9d6444dc50c73ab295c895298d18 Mon Sep 17 00:00:00 2001 From: VanillaBrooks Date: Thu, 10 Nov 2022 18:01:28 -0800 Subject: [PATCH 2/5] move dissipation rate to be exported with shear stress information --- python/main.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/python/main.py b/python/main.py index 3ff1104..8881d41 100755 --- a/python/main.py +++ b/python/main.py @@ -44,6 +44,7 @@ temp_field = np.zeros((config.nx_mpi(), config.ny_mpi(), config.nz_mpi()), dtype=np.float64) dt_array = np.zeros(1) time_array = np.zeros(1) +dissipation_rate_array = np.zeros(1) # # execute streams setup routines @@ -77,6 +78,7 @@ span_average_dset = io_utils.VectorFieldXY2D(span_averages, [5, *span_average_shape], numwrites, "span_average", rank) shear_stress_dset = io_utils.ScalarFieldX1D(span_averages, [config.grid.nx], numwrites, "shear_stress", rank) span_average_time_dset = io_utils.Scalar1D(span_averages, [1], numwrites, "time", rank) +dissipation_rate_dset = io_utils.Scalar1D(span_averages, [1], numwrites, "dissipation_rate", rank) # trajectories files dt_dset = io_utils.Scalar1D(trajectories, [1], config.temporal.num_iter, "dt", rank) @@ -92,9 +94,6 @@ time += streams.mod_streams.dtglobal time_array[:] = time - streams.wrap_dissipation_calculation() - utils.hprint(f"dissipation rate is {streams.mod_streams.dissipation_rate}") - if (i % config.temporal.span_average_io_steps) == 0: utils.hprint("writing span average to output") streams.wrap_copy_gpu_to_cpu() @@ -110,6 +109,11 @@ # write the time at which this data was collected span_average_time_dset.write_array(time_array) + # calculate dissipation rate on GPU and store the result + streams.wrap_dissipation_calculation() + dissipation_rate_array[:] = streams.mod_streams.dissipation_rate + dissipation_rate_dset.write_array(dissipation_rate_array) + # save dt information for every step dt_array[:] = streams.mod_streams.dtglobal dt_dset.write_array(dt_array) From 5c485b35fbcb7b2a1b29728779df50b38054b8a4 Mon Sep 17 00:00:00 2001 From: VanillaBrooks Date: Thu, 10 Nov 2022 18:20:20 -0800 Subject: [PATCH 3/5] energy gpu calculation and export to hdf5 --- python/main.py | 7 ++++++ src/dissipation.F90 | 54 +++++++++++++++++++++++++++++++++++++++++++++ src/min_api.F90 | 8 +++++++ src/mod_streams.F90 | 4 ++++ 4 files changed, 73 insertions(+) diff --git a/python/main.py b/python/main.py index 8881d41..474a601 100755 --- a/python/main.py +++ b/python/main.py @@ -45,6 +45,7 @@ dt_array = np.zeros(1) time_array = np.zeros(1) dissipation_rate_array = np.zeros(1) +energy_array = np.zeros(1) # # execute streams setup routines @@ -79,6 +80,7 @@ shear_stress_dset = io_utils.ScalarFieldX1D(span_averages, [config.grid.nx], numwrites, "shear_stress", rank) span_average_time_dset = io_utils.Scalar1D(span_averages, [1], numwrites, "time", rank) dissipation_rate_dset = io_utils.Scalar1D(span_averages, [1], numwrites, "dissipation_rate", rank) +energy_dset = io_utils.Scalar1D(span_averages, [1], numwrites, "energy", rank) # trajectories files dt_dset = io_utils.Scalar1D(trajectories, [1], config.temporal.num_iter, "dt", rank) @@ -114,6 +116,11 @@ dissipation_rate_array[:] = streams.mod_streams.dissipation_rate dissipation_rate_dset.write_array(dissipation_rate_array) + # calculate energy on GPU and store the result + streams.wrap_energy_calculation() + energy_array[:] = streams.mod_streams.energy + energy_dset.write_array(energy_array) + # save dt information for every step dt_array[:] = streams.mod_streams.dtglobal dt_dset.write_array(dt_array) diff --git a/src/dissipation.F90 b/src/dissipation.F90 index 95a41b3..6cf314c 100644 --- a/src/dissipation.F90 +++ b/src/dissipation.F90 @@ -123,3 +123,57 @@ subroutine dissipation_calculation() dissipation_rate = mpi_dissipation_sum endsubroutine dissipation_calculation + +subroutine energy_calculation() + use mod_streams + + implicit none + + integer :: i, j, k + + real*8 :: uu, vv, ww, rho + real*8 :: dudx, dvdy, dwdz + real*8 :: dx, dy, dz + real*8 :: mpi_energy_sum + + ! dissipation_rate is defined in mod_streams, we reset its value here + energy = 0.0 + + mpi_energy_sum = 0.0 + + ! w_gpu(:, :, :, 1) -> rho + ! w_gpu(:, :, :, 2) -> rho u + ! w_gpu(:, :, :, 3) -> rho v + ! w_gpu(:, :, :, 4) -> rho w + ! w_gpu(:, :, :, 5) -> rho E + + !$cuf kernel do(3) <<<*,*>>> + do k = 1,nz + do j = 1,ny + do i = 1,nx + rho = w_gpu(i, j, k, 1) + uu = w_gpu(i, j, k, 2) / rho + vv = w_gpu(i, j, k, 3) / rho + ww = w_gpu(i, j, k, 4) / rho + + dx = (x_gpu(i-1) + x_gpu(i+1)) / 2 + dy = (y_gpu(j-1) + y_gpu(j+1)) / 2 + dz = (z_gpu(k-1) + z_gpu(k+1)) / 2 + + energy = energy + & + (uu**2 + vv**2 + ww**2) * dx * dy * dz + enddo + enddo + enddo + + energy = energy * 0.5 + + ! sum all the values across MPI, store the result in mpi_energy_sum + call MPI_REDUCE(energy, mpi_energy_sum, 1, mpi_prec, MPI_SUM, 0, MPI_COMM_WORLD, iermpi) + + ! distribute mpi_energy_sum to all MPI procs + call MPI_BCAST(mpi_energy_sum, 1, mpi_prec, 0, MPI_COMM_WORLD, iermpi) + + energy = mpi_energy_sum + +end subroutine energy_calculation diff --git a/src/min_api.F90 b/src/min_api.F90 index 78d6783..969a933 100644 --- a/src/min_api.F90 +++ b/src/min_api.F90 @@ -90,3 +90,11 @@ subroutine wrap_dissipation_calculation() call dissipation_calculation() end subroutine + +! calculate energy. See inner subroutine documentation. +! after calculation, the value is stored in `energy` in mod_streams +subroutine wrap_energy_calculation() + implicit none + + call energy_calculation() +end subroutine diff --git a/src/mod_streams.F90 b/src/mod_streams.F90 index a6ec0ea..922f753 100644 --- a/src/mod_streams.F90 +++ b/src/mod_streams.F90 @@ -87,6 +87,10 @@ module mod_streams !f2py real*8 :: dissipation_rate real(mykind) :: dissipation_rate + ! energy is calculated in dissipation.F90, subroutine energy_calculation +!f2py real*8 :: energy + real(mykind) :: energy + integer :: iflow integer :: idiski, ndim integer :: istore, istore_restart From 627dedc424c2ee3aee9ff9d0241aee677370de94 Mon Sep 17 00:00:00 2001 From: VanillaBrooks Date: Mon, 14 Nov 2022 17:49:01 -0800 Subject: [PATCH 4/5] fix finite difference equations, still need irregular grid equations --- python/main.py | 3 +++ src/dissipation.F90 | 47 +++++++++++++++++++++++++++++---------------- 2 files changed, 33 insertions(+), 17 deletions(-) diff --git a/python/main.py b/python/main.py index 474a601..9cf634e 100755 --- a/python/main.py +++ b/python/main.py @@ -115,12 +115,15 @@ streams.wrap_dissipation_calculation() dissipation_rate_array[:] = streams.mod_streams.dissipation_rate dissipation_rate_dset.write_array(dissipation_rate_array) + utils.hprint(f"dissipation is {dissipation_rate_array[0]}") # calculate energy on GPU and store the result streams.wrap_energy_calculation() energy_array[:] = streams.mod_streams.energy energy_dset.write_array(energy_array) + utils.hprint(f"energy is {energy_array[0]}") + # save dt information for every step dt_array[:] = streams.mod_streams.dtglobal dt_dset.write_array(dt_array) diff --git a/src/dissipation.F90 b/src/dissipation.F90 index 6cf314c..0c1183d 100644 --- a/src/dissipation.F90 +++ b/src/dissipation.F90 @@ -29,6 +29,27 @@ subroutine dissipation_calculation() do k = 1,nz do j = 1,ny do i = 1,nx + ! + ! at i = 1 dx = 0 so the boundaries will be really wonky. + ! the same happens for dy at j = 1, and for dz at k = 1 + ! + if (i == 1) then + dx = (x_gpu(i+1) - x_gpu(i)) / 2 + else + dx = (x_gpu(i+1) - x_gpu(i-1)) / 2 + endif + + if (j == 1) then + dy = (y_gpu(j+1) - y_gpu(j)) / 2 + else + dy = (y_gpu(j+1) - y_gpu(j-1)) / 2 + endif + + if (k == 1) then + dz = (z_gpu(k+1) - z_gpu(k)) / 2 + else + dz = (z_gpu(k+1) - z_gpu(k-1)) / 2 + endif ! ! du/dx @@ -50,9 +71,7 @@ subroutine dissipation_calculation() w_gpu(i-2, j, k, 2) / w_gpu(i-2, j, k, 1) & ) & ) & - ! replace 12 * \Delta x by 6 * (2 \Delta x) since x can vary - ! with grid size (Without really looking at grid mesh generation - / ( 6 * ( x_gpu(i+1) - x_gpu(i-1))) + / ( 12 * dx) ! ! dv/dy @@ -74,9 +93,7 @@ subroutine dissipation_calculation() w_gpu(i, j-2, k, 3) / w_gpu(i, j-2, k, 1) & ) & ) & - ! replace 12 * \Delta y by 6 * (2 \Delta y) since y can vary - ! with grid size (Without really looking at grid mesh generation - / ( 6 * ( y_gpu(j+1) - y_gpu(j-1))) + / ( 12 * dy) ! ! dw/dz @@ -98,16 +115,10 @@ subroutine dissipation_calculation() w_gpu(i, j, k-2, 4) / w_gpu(i, j, k-2, 1) & ) & ) & - ! replace 12 * \Delta z by 6 * (2 \Delta z) since z can vary - ! with grid size (Without really looking at grid mesh generation - / ( 6 * ( z_gpu(k+1) - z_gpu(k-1))) - - dx = (x_gpu(i-1) + x_gpu(i+1)) / 2 - dy = (y_gpu(j-1) + y_gpu(j+1)) / 2 - dz = (z_gpu(k-1) + z_gpu(k+1)) / 2 + / ( 12 * dz) dissipation_rate = dissipation_rate + & - (dudx**2 + dvdy**2 + dwdz**2) * dx * dy * dz + ((dudx**2 + dvdy**2 + dwdz**2) * dx * dy * dz) enddo enddo enddo @@ -115,6 +126,8 @@ subroutine dissipation_calculation() dissipation_rate = dissipation_rate / (rlx * rly * rlz) + !write(*, *) "dissipation rate", dissipation_rate + ! sum all the values across MPI, store the result in mpi_dissipation_sum call MPI_REDUCE(dissipation_rate, mpi_dissipation_sum, 1, mpi_prec, MPI_SUM, 0, MPI_COMM_WORLD, iermpi) @@ -156,9 +169,9 @@ subroutine energy_calculation() vv = w_gpu(i, j, k, 3) / rho ww = w_gpu(i, j, k, 4) / rho - dx = (x_gpu(i-1) + x_gpu(i+1)) / 2 - dy = (y_gpu(j-1) + y_gpu(j+1)) / 2 - dz = (z_gpu(k-1) + z_gpu(k+1)) / 2 + dx = (x_gpu(i+1) - x_gpu(i-1)) / 2 + dy = (y_gpu(j+1) - y_gpu(j-1)) / 2 + dz = (z_gpu(k+1) - z_gpu(k-1)) / 2 energy = energy + & (uu**2 + vv**2 + ww**2) * dx * dy * dz From cbea0e77869f684129eec428f1f67b1b60af7006 Mon Sep 17 00:00:00 2001 From: VanillaBrooks Date: Fri, 18 Nov 2022 16:39:03 -0800 Subject: [PATCH 5/5] dissipation calculation in the y direction includes a FDM for irregular grids --- src/alloc.F90 | 5 ++ src/dissipation.F90 | 159 +++++++++++++++++++++++++++++++++++++++----- src/mod_streams.F90 | 12 ++++ src/setup.F90 | 1 + 4 files changed, 159 insertions(+), 18 deletions(-) diff --git a/src/alloc.F90 b/src/alloc.F90 index fce6196..d001422 100644 --- a/src/alloc.F90 +++ b/src/alloc.F90 @@ -215,6 +215,11 @@ subroutine allocate_vars() allocate(wrecycav_gpu(ng,ny,nv)) allocate(tauw_x(1:nx)) + + allocate(fdm_y_stencil_gpu(5, ny)) + allocate(fdm_y_stencil(5, ny)) + allocate(fdm_individual_stencil(0:1, 0:4, 0:4)) + allocate(fdm_grid_points(0:4)) ! endsubroutine allocate_vars diff --git a/src/dissipation.F90 b/src/dissipation.F90 index 0c1183d..624ab06 100644 --- a/src/dissipation.F90 +++ b/src/dissipation.F90 @@ -76,24 +76,13 @@ subroutine dissipation_calculation() ! ! dv/dy ! - dvdy = ( & - ( & - -1 * w_gpu(i, j+2, k, 3) / w_gpu(i, j+2, k, 1) & - ) & - + & - ( & - 8 * w_gpu(i, j+1, k, 3) / w_gpu(i, j+1, k, 1) & - ) & - - & - ( & - 8 * w_gpu(i, j-1, k, 3) / w_gpu(i, j-1, k, 1) & - ) & - + & - ( & - w_gpu(i, j-2, k, 3) / w_gpu(i, j-2, k, 1) & - ) & - ) & - / ( 12 * dy) + ! fdm_y_stencil_gpu coeffs are calculated in generate_full_fdm_stencil() subroutine + dvdy = & + (w_gpu(i, j, k, 3) / w_gpu(i, j, k, 1)) * fdm_y_stencil_gpu(1, j) + & + (w_gpu(i, j+1, k, 3) / w_gpu(i, j+1, k, 1)) * fdm_y_stencil_gpu(2, j) + & + (w_gpu(i, j-1, k, 3) / w_gpu(i, j-1, k, 1)) * fdm_y_stencil_gpu(3, j) + & + (w_gpu(i, j+2, k, 3) / w_gpu(i, j+2, k, 1)) * fdm_y_stencil_gpu(4, j) + & + (w_gpu(i, j-2, k, 3) / w_gpu(i, j-2, k, 1)) * fdm_y_stencil_gpu(5, j) ! ! dw/dz @@ -137,6 +126,140 @@ subroutine dissipation_calculation() dissipation_rate = mpi_dissipation_sum endsubroutine dissipation_calculation +subroutine generate_full_fdm_stencil() + use mod_streams, only: delta => fdm_individual_stencil, alpha => fdm_grid_points, & + ny, fdm_y_stencil, fdm_y_stencil_gpu, masterproc, y + + implicit none + + integer :: j, i + real*8 :: y0, ym1, ym2, yp1, yp2, total + total = 0.0 + + if (masterproc) write(*,*) "generating FDM stencil for dissipation" + + do j = 1,ny + ym2 = y(j-2) + ym1 = y(j-1) + y0 = y(j) + yp1 = y(j+1) + yp2 = y(j+2) + + ! j + alpha(0) = y0 + ! j+1 + alpha(1) = yp1 + ! j-1 + alpha(2) = ym1 + ! j+2 + alpha(3) = yp2 + ! j-2 + alpha(4) = ym2 + + ! use alpha to compute the stencil for this set of points + call irregular_grid_stencil() + fdm_y_stencil(:, j) = delta(1, 4, :) + + do i = 1,5 + total = total + abs(fdm_y_stencil(i, j)) + enddo + enddo + + write(*,*) "total of stencil coefficients is", total + +! copy over to GPU variable +#ifdef USE_CUDA + fdm_y_stencil_gpu = fdm_y_stencil +#else + call move_alloc(fdm_y_stencil, fdm_y_stencil_gpu) +#endif + +endsubroutine + +! fetches the grid points from `fdm_grid_points` and operates on `fdm_individual_stencil_gpu` +! to perform the algorithm from +! Generation of Finite Difference Formulas on Arbitrarily Spaced Grids (Fornberg, 1988) +! to generate a stencil that we can use for the finite difference calculation +subroutine irregular_grid_stencil() + use mod_streams, only: delta => fdm_individual_stencil, alpha => fdm_grid_points + + implicit none + + integer :: M0, N0, v, m, n + real*8 :: c1, c2, c3, t1, t2, t3, x0 + + M0 = 1 + N0 = 4 + + delta(:, :, :) = 0.0 + delta(0, 0, 0) = 1.0 + c1 = 1.0 + + x0 = alpha(0) + + do n = 1,N0 + c2 = 1.0 + + do v = 0,n-1 + c3 = alpha(n) - alpha(v) + c2 = c2 * c3 + + do m = 0, min(n,M0) + t3 = delta(m, n-1, v) + t1 = (alpha(n) - x0) * t3 + + if (m == 0) then + t2 = 0.0 + else + t2 = m * delta(m-1, n-1, v) + endif + + delta(m, n, v) = (t1 -t2) / c3 + enddo + enddo + + do m = 0, min(n,M0) + if (m == 0) then + t1 = 0.0 + else + t1 = m * delta(m-1, n-1, n-1) + endif + + t3 = delta(m, n-1, n-1) + t2 = (alpha(n-1) - x0) * t3 + + delta(m, n, n) = (c1 / c2) * (t1 - t2) + enddo + + c1 = c2 + enddo + +end subroutine + +! calculate the first order derivative with 4th order error for a simple grid +subroutine validate_irregular_fdm() + use mod_streams + + fdm_grid_points(0) = 0.0 + fdm_grid_points(1) = 1. + fdm_grid_points(2) = -1. + fdm_grid_points(3) = 2. + fdm_grid_points(4) = -2. + + call irregular_grid_stencil() + ! should output + ! -0.0 (0) + ! 0.666667 (2/3) + ! -0.666667 (-2/3) + ! -0.0833333 (-1/12) + ! 0.0833333 (1/12) +#ifdef USE_CUDA +#else + ! can only print in CPU mode + write(*,*) fdm_individual_stencil(1, 4, :) +#endif +endsubroutine + subroutine energy_calculation() use mod_streams diff --git a/src/mod_streams.F90 b/src/mod_streams.F90 index deffec2..83ff3ac 100644 --- a/src/mod_streams.F90 +++ b/src/mod_streams.F90 @@ -87,6 +87,16 @@ module mod_streams !f2py real*8 :: dissipation_rate real(mykind) :: dissipation_rate + ! stencil for computing finite differences on the irregular y mesh +!f2py real*8, dimension(:, :), allocatable :: fdm_y_stencil + real(mykind), dimension(:, :), allocatable :: fdm_y_stencil + real(mykind), dimension(:, :), allocatable :: fdm_y_stencil_gpu + ! the array we can re-use for calculating a 4th order accurate approximation for the first derivative + ! according to + ! Generation of Finite Difference Formulas on Arbitrarily Spaced Grids (Fornberg, 1988) + real(mykind), dimension(:, :, :), allocatable :: fdm_individual_stencil + real(mykind), dimension(:), allocatable :: fdm_grid_points + ! energy is calculated in dissipation.F90, subroutine energy_calculation !f2py real*8 :: energy real(mykind) :: energy @@ -241,6 +251,8 @@ module mod_streams attributes(device) :: fhat_trans_gpu, fl_trans_gpu attributes(device) :: temperature_trans_gpu attributes(device) :: wv_gpu, wv_trans_gpu + + attributes(device) :: fdm_y_stencil_gpu ! ! TODO: this might be problematic !f2py intent(hide) :: local_comm, mydev diff --git a/src/setup.F90 b/src/setup.F90 index 4a27395..4da8d88 100644 --- a/src/setup.F90 +++ b/src/setup.F90 @@ -30,6 +30,7 @@ subroutine setup if (masterproc) write(*,*) 'Initialize field' call init() call checkdt() + call generate_full_fdm_stencil() !=================================================== ! end subroutine setup