Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Dissipation calculation, including a finite difference method on an irregular grid #4

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions python/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@
temp_field = np.zeros((config.nx_mpi(), config.ny_mpi(), config.nz_mpi()), dtype=np.float64)
dt_array = np.zeros(1)
time_array = np.zeros(1)
dissipation_rate_array = np.zeros(1)
energy_array = np.zeros(1)

#
# execute streams setup routines
Expand Down Expand Up @@ -78,6 +80,8 @@
span_average_dset = io_utils.VectorFieldXY2D(span_averages, [5, *span_average_shape], numwrites, "span_average", rank)
shear_stress_dset = io_utils.ScalarFieldX1D(span_averages, [config.grid.nx], numwrites, "shear_stress", rank)
span_average_time_dset = io_utils.Scalar0D(span_averages, [1], numwrites, "time", rank)
dissipation_rate_dset = io_utils.Scalar0D(span_averages, [1], numwrites, "dissipation_rate", rank)
energy_dset = io_utils.Scalar0D(span_averages, [1], numwrites, "energy", rank)

# trajectories files
dt_dset = io_utils.Scalar0D(trajectories, [1], config.temporal.num_iter, "dt", rank)
Expand Down Expand Up @@ -121,6 +125,19 @@
# write the time at which this data was collected
span_average_time_dset.write_array(time_array)

# calculate dissipation rate on GPU and store the result
streams.wrap_dissipation_calculation()
dissipation_rate_array[:] = streams.mod_streams.dissipation_rate
dissipation_rate_dset.write_array(dissipation_rate_array)
utils.hprint(f"dissipation is {dissipation_rate_array[0]}")

# calculate energy on GPU and store the result
streams.wrap_energy_calculation()
energy_array[:] = streams.mod_streams.energy
energy_dset.write_array(energy_array)

utils.hprint(f"energy is {energy_array[0]}")

# save dt information for every step
dt_array[:] = streams.mod_streams.dtglobal
dt_dset.write_array(dt_array)
Expand Down
4 changes: 2 additions & 2 deletions src/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ OBJ_FILES = alloc.o bcdf.o bcextr.o bcfree.o bc.o bcrecyc.o bcrelax.o bcshk.o bc
setup.o solver.o startmpi.o stats.o step.o target_reystress.o tbforce.o updateghost.o utility.o utyibm.o \
visflx.o writedf.o writefield.o writefieldvtk.o writegridplot3d.o writerst.o \
writestatbl.o writestatchann.o writestat.o writestatzbl.o write_wallpressure.o visflx_stag.o \
write_probe_data.o bcblow.o
write_probe_data.o bcblow.o dissipation.o

PY_API_OBJS = min_api.o

Expand All @@ -67,7 +67,7 @@ STATIC_FILES = alloc.F90 bcdf.F90 bcextr.F90 bcfree.F90 bc.F90 bcrecyc.F90 bcrel
setup.F90 solver.F90 startmpi.F90 stats.F90 step.F90 target_reystress.F90 tbforce.F90 updateghost.F90 utility.F90 utyibm.F90 \
visflx.F90 writedf.F90 writefield.F90 writefieldvtk.F90 writegridplot3d.F90 writerst.F90 \
writestatbl.F90 writestatchann.F90 writestat.F90 writestatzbl.F90 write_wallpressure.F90 visflx_stag.F90 \
write_probe_data.F90 bcblow.F90
write_probe_data.F90 bcblow.F90 dissipation.F90

STATIC_MODS = mod_streams.F90 mod_sys.F90 euler.F90

Expand Down
5 changes: 5 additions & 0 deletions src/alloc.F90
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,11 @@ subroutine allocate_vars()
allocate(wrecycav_gpu(ng,ny,nv))

allocate(tauw_x(1:nx))

allocate(fdm_y_stencil_gpu(5, ny))
allocate(fdm_y_stencil(5, ny))
allocate(fdm_individual_stencil(0:1, 0:4, 0:4))
allocate(fdm_grid_points(0:4))
!
endsubroutine allocate_vars

Expand Down
315 changes: 315 additions & 0 deletions src/dissipation.F90
Original file line number Diff line number Diff line change
@@ -0,0 +1,315 @@
! calculate the dissipation rate. See lab streams documentation for
! `Calculated Quantities` for the equation being calculated
!
! after calculation, the value is stored in `dissipation_rate` in mod_streams
subroutine dissipation_calculation()
use mod_streams

implicit none

integer :: i, j, k

real*8 :: uu, vv, ww, rho
real*8 :: dudx, dvdy, dwdz
real*8 :: dx, dy, dz
real*8 :: mpi_dissipation_sum

! dissipation_rate is defined in mod_streams, we reset its value here
dissipation_rate = 0.0

mpi_dissipation_sum = 0.0

! w_gpu(:, :, :, 1) -> rho
! w_gpu(:, :, :, 2) -> rho u
! w_gpu(:, :, :, 3) -> rho v
! w_gpu(:, :, :, 4) -> rho w
! w_gpu(:, :, :, 5) -> rho E

!$cuf kernel do(3) <<<*,*>>>
do k = 1,nz
do j = 1,ny
do i = 1,nx
!
! at i = 1 dx = 0 so the boundaries will be really wonky.
! the same happens for dy at j = 1, and for dz at k = 1
!
if (i == 1) then
dx = (x_gpu(i+1) - x_gpu(i)) / 2
else
dx = (x_gpu(i+1) - x_gpu(i-1)) / 2
endif

if (j == 1) then
dy = (y_gpu(j+1) - y_gpu(j)) / 2
else
dy = (y_gpu(j+1) - y_gpu(j-1)) / 2
endif

if (k == 1) then
dz = (z_gpu(k+1) - z_gpu(k)) / 2
else
dz = (z_gpu(k+1) - z_gpu(k-1)) / 2
endif

!
! du/dx
!
dudx = ( &
( &
-1 * w_gpu(i+2, j, k, 2) / w_gpu(i+2, j, k, 1) &
) &
+ &
( &
8 * w_gpu(i+1, j, k, 2) / w_gpu(i+1, j, k, 1) &
) &
- &
( &
8 * w_gpu(i-1, j, k, 2) / w_gpu(i-1, j, k, 1) &
) &
+ &
( &
w_gpu(i-2, j, k, 2) / w_gpu(i-2, j, k, 1) &
) &
) &
/ ( 12 * dx)

!
! dv/dy
!
! fdm_y_stencil_gpu coeffs are calculated in generate_full_fdm_stencil() subroutine
dvdy = &
(w_gpu(i, j, k, 3) / w_gpu(i, j, k, 1)) * fdm_y_stencil_gpu(1, j) + &
(w_gpu(i, j+1, k, 3) / w_gpu(i, j+1, k, 1)) * fdm_y_stencil_gpu(2, j) + &
(w_gpu(i, j-1, k, 3) / w_gpu(i, j-1, k, 1)) * fdm_y_stencil_gpu(3, j) + &
(w_gpu(i, j+2, k, 3) / w_gpu(i, j+2, k, 1)) * fdm_y_stencil_gpu(4, j) + &
(w_gpu(i, j-2, k, 3) / w_gpu(i, j-2, k, 1)) * fdm_y_stencil_gpu(5, j)

!
! dw/dz
!
dwdz = ( &
( &
-1 * w_gpu(i, j, k+2, 4) / w_gpu(i, j, k+2, 1) &
) &
+ &
( &
8 * w_gpu(i, j, k+1, 4) / w_gpu(i, j, k+1, 1) &
) &
- &
( &
8 * w_gpu(i, j, k-1, 4) / w_gpu(i, j, k-1, 1) &
) &
+ &
( &
w_gpu(i, j, k-2, 4) / w_gpu(i, j, k-2, 1) &
) &
) &
/ ( 12 * dz)

dissipation_rate = dissipation_rate + &
((dudx**2 + dvdy**2 + dwdz**2) * dx * dy * dz)
enddo
enddo
enddo
!@cuf iercuda=cudaDeviceSynchronize()

dissipation_rate = dissipation_rate / (rlx * rly * rlz)

!write(*, *) "dissipation rate", dissipation_rate

! sum all the values across MPI, store the result in mpi_dissipation_sum
call MPI_REDUCE(dissipation_rate, mpi_dissipation_sum, 1, mpi_prec, MPI_SUM, 0, MPI_COMM_WORLD, iermpi)

! distribute mpi_dissipation_sum to all MPI procs
call MPI_BCAST(mpi_dissipation_sum, 1, mpi_prec, 0, MPI_COMM_WORLD, iermpi)

dissipation_rate = mpi_dissipation_sum
endsubroutine dissipation_calculation

subroutine generate_full_fdm_stencil()
use mod_streams, only: delta => fdm_individual_stencil, alpha => fdm_grid_points, &
ny, fdm_y_stencil, fdm_y_stencil_gpu, masterproc, y

implicit none

integer :: j, i
real*8 :: y0, ym1, ym2, yp1, yp2, total
total = 0.0

if (masterproc) write(*,*) "generating FDM stencil for dissipation"

do j = 1,ny
ym2 = y(j-2)
ym1 = y(j-1)
y0 = y(j)
yp1 = y(j+1)
yp2 = y(j+2)

! j
alpha(0) = y0
! j+1
alpha(1) = yp1
! j-1
alpha(2) = ym1
! j+2
alpha(3) = yp2
! j-2
alpha(4) = ym2

! use alpha to compute the stencil for this set of points
call irregular_grid_stencil()
fdm_y_stencil(:, j) = delta(1, 4, :)

do i = 1,5
total = total + abs(fdm_y_stencil(i, j))
enddo
enddo

write(*,*) "total of stencil coefficients is", total

! copy over to GPU variable
#ifdef USE_CUDA
fdm_y_stencil_gpu = fdm_y_stencil
#else
call move_alloc(fdm_y_stencil, fdm_y_stencil_gpu)
#endif

endsubroutine

! fetches the grid points from `fdm_grid_points` and operates on `fdm_individual_stencil_gpu`
! to perform the algorithm from
! Generation of Finite Difference Formulas on Arbitrarily Spaced Grids (Fornberg, 1988)
! to generate a stencil that we can use for the finite difference calculation
subroutine irregular_grid_stencil()
use mod_streams, only: delta => fdm_individual_stencil, alpha => fdm_grid_points

implicit none

integer :: M0, N0, v, m, n
real*8 :: c1, c2, c3, t1, t2, t3, x0

M0 = 1
N0 = 4

delta(:, :, :) = 0.0
delta(0, 0, 0) = 1.0
c1 = 1.0

x0 = alpha(0)

do n = 1,N0
c2 = 1.0

do v = 0,n-1
c3 = alpha(n) - alpha(v)
c2 = c2 * c3

do m = 0, min(n,M0)
t3 = delta(m, n-1, v)
t1 = (alpha(n) - x0) * t3

if (m == 0) then
t2 = 0.0
else
t2 = m * delta(m-1, n-1, v)
endif

delta(m, n, v) = (t1 -t2) / c3
enddo
enddo

do m = 0, min(n,M0)
if (m == 0) then
t1 = 0.0
else
t1 = m * delta(m-1, n-1, n-1)
endif

t3 = delta(m, n-1, n-1)
t2 = (alpha(n-1) - x0) * t3

delta(m, n, n) = (c1 / c2) * (t1 - t2)
enddo

c1 = c2
enddo

end subroutine

! calculate the first order derivative with 4th order error for a simple grid
subroutine validate_irregular_fdm()
use mod_streams

fdm_grid_points(0) = 0.0
fdm_grid_points(1) = 1.
fdm_grid_points(2) = -1.
fdm_grid_points(3) = 2.
fdm_grid_points(4) = -2.

call irregular_grid_stencil()
! should output
! -0.0 (0)
! 0.666667 (2/3)
! -0.666667 (-2/3)
! -0.0833333 (-1/12)
! 0.0833333 (1/12)
#ifdef USE_CUDA
#else
! can only print in CPU mode
write(*,*) fdm_individual_stencil(1, 4, :)
#endif
endsubroutine

subroutine energy_calculation()
use mod_streams

implicit none

integer :: i, j, k

real*8 :: uu, vv, ww, rho
real*8 :: dudx, dvdy, dwdz
real*8 :: dx, dy, dz
real*8 :: mpi_energy_sum

! dissipation_rate is defined in mod_streams, we reset its value here
energy = 0.0

mpi_energy_sum = 0.0

! w_gpu(:, :, :, 1) -> rho
! w_gpu(:, :, :, 2) -> rho u
! w_gpu(:, :, :, 3) -> rho v
! w_gpu(:, :, :, 4) -> rho w
! w_gpu(:, :, :, 5) -> rho E

!$cuf kernel do(3) <<<*,*>>>
do k = 1,nz
do j = 1,ny
do i = 1,nx
rho = w_gpu(i, j, k, 1)
uu = w_gpu(i, j, k, 2) / rho
vv = w_gpu(i, j, k, 3) / rho
ww = w_gpu(i, j, k, 4) / rho

dx = (x_gpu(i+1) - x_gpu(i-1)) / 2
dy = (y_gpu(j+1) - y_gpu(j-1)) / 2
dz = (z_gpu(k+1) - z_gpu(k-1)) / 2

energy = energy + &
(uu**2 + vv**2 + ww**2) * dx * dy * dz
enddo
enddo
enddo

energy = energy * 0.5

! sum all the values across MPI, store the result in mpi_energy_sum
call MPI_REDUCE(energy, mpi_energy_sum, 1, mpi_prec, MPI_SUM, 0, MPI_COMM_WORLD, iermpi)

! distribute mpi_energy_sum to all MPI procs
call MPI_BCAST(mpi_energy_sum, 1, mpi_prec, 0, MPI_COMM_WORLD, iermpi)

energy = mpi_energy_sum

end subroutine energy_calculation
Loading