Skip to content

Commit

Permalink
Make c2x/x2c reorder kernels available in CUDA backend.
Browse files Browse the repository at this point in the history
  • Loading branch information
semi-h committed Mar 14, 2024
1 parent 2815bea commit ed88aa4
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 2 deletions.
3 changes: 2 additions & 1 deletion src/common.f90
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@ module m_common
real(dp), parameter :: pi = 4*atan(1.0_dp)

integer, parameter :: RDR_X2Y = 12, RDR_X2Z = 13, RDR_Y2X = 21, &
RDR_Y2Z = 23, RDR_Z2X = 31, RDR_Z2Y = 32
RDR_Y2Z = 23, RDR_Z2X = 31, RDR_Z2Y = 32, &
RDR_C2X = 41, RDR_X2C = 14

integer, parameter :: POISSON_SOLVER_FFT = 0, POISSON_SOLVER_CG = 1

Expand Down
11 changes: 10 additions & 1 deletion src/cuda/backend.f90
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@ module m_cuda_backend
use m_allocator, only: allocator_t, field_t
use m_base_backend, only: base_backend_t
use m_common, only: dp, globs_t, &
RDR_X2Y, RDR_X2Z, RDR_Y2X, RDR_Y2Z, RDR_Z2X, RDR_Z2Y
RDR_X2Y, RDR_X2Z, RDR_Y2X, RDR_Y2Z, RDR_Z2X, RDR_Z2Y, &
RDR_C2X, RDR_X2C
use m_poisson_fft, only: poisson_fft_t
use m_tdsops, only: dirps_t, tdsops_t

Expand Down Expand Up @@ -445,6 +446,14 @@ subroutine reorder_cuda(self, u_o, u_i, direction)
threads = dim3(SZ, SZ, 1)
call reorder_z2y<<<blocks, threads>>>(u_o_d, u_i_d, &
self%nx_loc, self%nz_loc)
case (RDR_C2X) ! c2x
blocks = dim3(self%nx_loc/SZ, self%ny_loc/SZ, self%nz_loc)
threads = dim3(SZ, SZ, 1)
call reorder_c2x<<<blocks, threads>>>(u_o_d, u_i_d, self%nz_loc)
case (RDR_X2C) ! x2c
blocks = dim3(self%nx_loc/SZ, self%ny_loc/SZ, self%nz_loc)
threads = dim3(SZ, SZ, 1)
call reorder_x2c<<<blocks, threads>>>(u_o_d, u_i_d, self%nz_loc)
case default
error stop 'Reorder direction is undefined.'
end select
Expand Down

0 comments on commit ed88aa4

Please sign in to comment.