-
Notifications
You must be signed in to change notification settings - Fork 4
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
FFT for OMP backend (via 2decomp&fft) #113
base: main
Are you sure you want to change the base?
Changes from all commits
00b8780
00630df
afcd22c
8a513a3
c0fb05a
9c113a4
2e6f86c
8d1b528
eb3883b
9b9bc35
8132b2b
7901035
af250cc
31a2035
fc19718
151eea7
c0649d2
3bd7540
56f2b44
c1637a1
01403a9
ea2218e
b75fde0
846fffc
51838dc
8a71c8f
b36cb75
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,7 +9,7 @@ on: | |
|
||
jobs: | ||
unit-tests: | ||
runs-on: ubuntu-20.04 | ||
runs-on: ubuntu-22.04 | ||
env: | ||
OMPI_VERSION: 4.1.5 | ||
steps: | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
# - Find the 2decomp-fft library | ||
find_package(decomp2d | ||
PATHS ${CMAKE_SOURCE_DIR}/decomp2d/build) | ||
if (decomp2d_FOUND) | ||
message(STATUS "2decomp-fft FOUND") | ||
else(decomp2d_FOUND) | ||
message(STATUS "2decomp-fft PATH not available we'll try to download and install") | ||
configure_file(${CMAKE_SOURCE_DIR}/cmake/decomp2d/downloadBuild2decomp.cmake.in decomp2d-build/CMakeLists.txt) | ||
#message("Second CMAKE_GENERATOR ${CMAKE_GENERATOR}") | ||
execute_process(COMMAND ${CMAKE_COMMAND} -G "${CMAKE_GENERATOR}" . | ||
RESULT_VARIABLE result | ||
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/decomp2d-build ) | ||
if(result) | ||
message(FATAL_ERROR "CMake step for 2decomp-fft failed: ${result}") | ||
else() | ||
message("CMake step for 2decomp-fft completed (${result}).") | ||
endif() | ||
execute_process(COMMAND ${CMAKE_COMMAND} --build . | ||
RESULT_VARIABLE result | ||
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/decomp2d-build ) | ||
if(result) | ||
message(FATAL_ERROR "Build step for 2decomp-fft failed: ${result}") | ||
endif() | ||
set(D2D_ROOT ${CMAKE_CURRENT_BINARY_DIR}/decomp2d-build/downloadBuild2decomp-prefix/src/downloadBuild2decomp-build) | ||
find_package(decomp2d REQUIRED | ||
PATHS ${D2D_ROOT}) | ||
endif(decomp2d_FOUND) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
# downloadBuild2decomp.cmake.in | ||
# | ||
cmake_minimum_required(VERSION 3.0.2) | ||
|
||
project(downloadBuild2decomp NONE) | ||
|
||
include(ExternalProject) | ||
|
||
ExternalProject_Add(downloadBuild2decomp | ||
GIT_REPOSITORY "https://github.com/xcompact3d/2decomp-fft" | ||
GIT_TAG "v2.0.3" | ||
SOURCE_DIR "${CMAKE_CURRENT_BINARY_DIR}/decomp2d-src" | ||
INSTALL_DIR "${CMAKE_CURRENT_BINARY_DIR}/decomp2d-opt" | ||
TEST_COMMAND "" | ||
) | ||
|
||
#ExternalProject_Add(downloadBuild2decomp | ||
# GIT_REPOSITORY "https://github.com/xcompact3d/2decomp-fft" | ||
# GIT_TAG "main" | ||
# CONFIGURE_COMMAND "cmake -S ${CMAKE_CURRENT_BINARY_DIR}/decomp2d-src " | ||
# BUILD_COMMAND "" | ||
# INSTALL_COMMAND "" | ||
# TEST_COMMAND "" | ||
# SOURCE_DIR "${CMAKE_CURRENT_BINARY_DIR}/decomp2d-src" | ||
# BINARY_DIR "" | ||
# INSTALL_DIR "" | ||
Comment on lines
+17
to
+26
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Remove commented code? |
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
@@ -0,0 +1,113 @@ | ||||||
submodule(m_decomp) m_decomp_2decompfft | ||||||
!! Parallel decomposition provided by 2decomp&FFT | ||||||
|
||||||
use mpi | ||||||
implicit none | ||||||
|
||||||
type, extends(decomp_t) :: decomp_2decompfft_t | ||||||
contains | ||||||
procedure :: decomposition => decomposition_2decompfft | ||||||
end type | ||||||
|
||||||
contains | ||||||
|
||||||
module subroutine init_decomp(decomp) | ||||||
class(decomp_t), allocatable, intent(out):: decomp | ||||||
|
||||||
allocate(decomp_2decompfft_t :: decomp) | ||||||
end subroutine | ||||||
|
||||||
module subroutine decomposition_2decompfft(self, grid, par) | ||||||
!! Performs 2D mesh decomposition using 2decomp&fft | ||||||
use m_mesh_content, only: par_t, grid_t | ||||||
use decomp_2d, only: decomp_2d_init, DECOMP_2D_COMM_CART_X, xsize, xstart | ||||||
use decomp_2d_mpi, only: nrank, nproc | ||||||
|
||||||
class(decomp_2decompfft_t) :: self | ||||||
class(grid_t), intent(inout) :: grid | ||||||
class(par_t), intent(inout) :: par | ||||||
integer :: p_col, p_row | ||||||
integer, allocatable, dimension(:, :, :) :: global_ranks | ||||||
integer, allocatable, dimension(:) :: global_ranks_lin | ||||||
integer :: nproc | ||||||
integer, dimension(3) :: subd_pos, subd_pos_prev, subd_pos_next | ||||||
logical, dimension(3) :: periodic_bc | ||||||
integer :: dir | ||||||
logical :: is_last_domain | ||||||
integer :: nx, ny, nz | ||||||
integer :: ierr | ||||||
integer :: cart_rank | ||||||
integer, dimension(2) :: coords | ||||||
|
||||||
if (par%is_root()) then | ||||||
print*, "Domain decomposition by 2decomp&fft" | ||||||
end if | ||||||
nrank = par%nrank | ||||||
nproc = par%nproc | ||||||
|
||||||
nx = grid%global_cell_dims(1) | ||||||
ny = grid%global_cell_dims(2) | ||||||
nz = grid%global_cell_dims(3) | ||||||
|
||||||
p_row = par%nproc_dir(2) | ||||||
p_col = par%nproc_dir(3) | ||||||
if (p_row*p_col /= par%nproc) then | ||||||
error stop "Decomposition in X not supported by 2decomp&fft backend" | ||||||
end if | ||||||
periodic_bc(:) = grid%periodic_BC(:) | ||||||
call decomp_2d_init(nx, ny, nz, p_row, p_col, periodic_bc) | ||||||
|
||||||
! Get global_ranks | ||||||
allocate(global_ranks(1, p_row, p_col)) | ||||||
allocate(global_ranks_lin(p_row*p_col)) | ||||||
global_ranks_lin(:) = 0 | ||||||
|
||||||
call MPI_Comm_rank(DECOMP_2D_COMM_CART_X, cart_rank, ierr) | ||||||
call MPI_Cart_coords(DECOMP_2D_COMM_CART_X, cart_rank, 2, coords, ierr) | ||||||
|
||||||
global_ranks_lin(coords(1)+1 + p_row*(coords(2))) = par%nrank | ||||||
|
||||||
call MPI_Allreduce(MPI_IN_PLACE, global_ranks_lin, p_row*p_col, MPI_INTEGER, MPI_SUM, MPI_COMM_WORLD, ierr) | ||||||
|
||||||
global_ranks = reshape(global_ranks_lin, shape=[1, p_row, p_col]) | ||||||
Comment on lines
+52
to
+72
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This bit is the only difference in the decomposition strategy we need to sort out when using 2DECOMP&FFT. The custom strategy we follow is very straightforward as in Lines 226 to 227 in 9c39613
All we need from 2DECOMP&FFT is the rank mapping so that we can use it instead of our custom rank mapping, and this can be obtained after instantiating the library with global grid dimensions, decomposition as user sets in the input file, and the periodicity in BCs. Because all this prerequisites to instantiate 2DECOMP&FFT are known from the very beginning of the program (from the input file), there is a possibility that we instantiate the library somewhere else, before the mesh class is instantiated. Instantiating 2DECOMP&FFT outside of mesh class would make things easier, because instead of working out the rank mapping inside the mesh class which requires a relatively complex structure, we can pass the rank mapping as an input argument and then here in the higlighted bit use this simple input array to carry on with all we do in the mesh class. I think this would simply the structure quite a lot, what do you think? |
||||||
|
||||||
! subdomain position in the global domain | ||||||
subd_pos = findloc(global_ranks, par%nrank) | ||||||
|
||||||
! local/directional position of the subdomain | ||||||
par%nrank_dir(:) = subd_pos(:) - 1 | ||||||
|
||||||
! Get local domain size and offset from 2decomp | ||||||
grid%cell_dims(:) = xsize(:) | ||||||
par%n_offset(:) = xstart(:) | ||||||
|
||||||
! compute vert_dims from cell_dims | ||||||
do dir = 1, 3 | ||||||
is_last_domain = (par%nrank_dir(dir) + 1 == par%nproc_dir(dir)) | ||||||
if (is_last_domain .and. (.not. grid%periodic_BC(dir))) then | ||||||
grid%vert_dims(dir) = grid%cell_dims(dir) +1 | ||||||
else | ||||||
grid%vert_dims(dir) = grid%cell_dims(dir) | ||||||
end if | ||||||
end do | ||||||
|
||||||
! Get neighbour ranks | ||||||
do dir = 1, 3 | ||||||
nproc = par%nproc_dir(dir) | ||||||
subd_pos_prev(:) = subd_pos(:) | ||||||
subd_pos_prev(dir) = modulo(subd_pos(dir) - 2, nproc) + 1 | ||||||
par%pprev(dir) = global_ranks(subd_pos_prev(1), & | ||||||
subd_pos_prev(2), & | ||||||
subd_pos_prev(3)) | ||||||
|
||||||
subd_pos_next(:) = subd_pos(:) | ||||||
subd_pos_next(dir) = modulo(subd_pos(dir) - nproc, nproc) + 1 | ||||||
par%pnext(dir) = global_ranks(subd_pos_next(1), & | ||||||
subd_pos_next(2), & | ||||||
subd_pos_next(3)) | ||||||
end do | ||||||
Comment on lines
+84
to
+108
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this not handled by mesh initialisation? i.e. is this the appropriate place to do this, or should it be done by a |
||||||
|
||||||
end subroutine decomposition_2decompfft | ||||||
|
||||||
|
||||||
end submodule |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,11 +9,16 @@ set(SRC | |
solver.f90 | ||
tdsops.f90 | ||
time_integrator.f90 | ||
ordering.f90 | ||
mesh.f90 | ||
mesh_content.f90 | ||
decomp.f90 | ||
field.f90 | ||
vector_calculus.f90 | ||
omp/backend.f90 | ||
omp/common.f90 | ||
omp/kernels/distributed.f90 | ||
omp/poisson_fft.f90 | ||
omp/kernels/spectral_processing.f90 | ||
omp/sendrecv.f90 | ||
omp/exec_dist.f90 | ||
) | ||
|
@@ -31,21 +36,31 @@ set(CUDASRC | |
cuda/sendrecv.f90 | ||
cuda/tdsops.f90 | ||
) | ||
set(2DECOMPFFTSRC | ||
2decompfft/omp/poisson_fft.f90 | ||
2decompfft/decomp.f90 | ||
) | ||
set(GENERICDECOMPSRC | ||
decomp_generic.f90 | ||
) | ||
|
||
if(${CMAKE_Fortran_COMPILER_ID} STREQUAL "PGI" OR | ||
${CMAKE_Fortran_COMPILER_ID} STREQUAL "NVHPC") | ||
if(${BACKEND} STREQUAL "CUDA") | ||
list(APPEND SRC ${CUDASRC}) | ||
endif() | ||
|
||
if (${POISSON_SOLVER} STREQUAL "FFT" AND ${BACKEND} STREQUAL "OMP") | ||
list(APPEND SRC ${2DECOMPFFTSRC}) | ||
else() | ||
list(APPEND SRC ${GENERICDECOMPSRC}) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Does There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Generic does the decomposition fine, and should be used when using CUDA (or ITER). It may not be consistent with what 2decomp&fft does internally hence the need to have a separate one for it calling 2decomp directly. |
||
endif() | ||
|
||
add_library(x3d2 STATIC ${SRC}) | ||
target_include_directories(x3d2 INTERFACE ${CMAKE_CURRENT_BINARY_DIR}) | ||
|
||
add_executable(xcompact xcompact.f90) | ||
target_link_libraries(xcompact PRIVATE x3d2) | ||
|
||
if(${CMAKE_Fortran_COMPILER_ID} STREQUAL "PGI" OR | ||
${CMAKE_Fortran_COMPILER_ID} STREQUAL "NVHPC") | ||
|
||
if(${BACKEND} STREQUAL "CUDA") | ||
set(CMAKE_Fortran_FLAGS "-cpp -cuda") | ||
set(CMAKE_Fortran_FLAGS_DEBUG "-g -O0 -traceback -Mbounds -Mchkptr -Ktrap=fp") | ||
set(CMAKE_Fortran_FLAGS_RELEASE "-O3 -fast") | ||
|
@@ -59,11 +74,19 @@ elseif(${CMAKE_Fortran_COMPILER_ID} STREQUAL "GNU") | |
set(CMAKE_Fortran_FLAGS_RELEASE "-O3 -ffast-math") | ||
endif() | ||
|
||
if (${POISSON_SOLVER} STREQUAL "FFT" AND ${BACKEND} STREQUAL "OMP") | ||
message(STATUS "Using the FFT poisson solver with 2decomp&fft") | ||
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake") | ||
find_package(decomp2d REQUIRED) | ||
include_directories(${decomp2d_INCLUDE_DIRS}) | ||
target_link_libraries(decomp2d) | ||
target_link_libraries(x3d2 PRIVATE decomp2d) | ||
endif() | ||
|
||
find_package(OpenMP REQUIRED) | ||
target_link_libraries(x3d2 PRIVATE OpenMP::OpenMP_Fortran) | ||
target_link_libraries(xcompact PRIVATE OpenMP::OpenMP_Fortran) | ||
|
||
find_package(MPI REQUIRED) | ||
target_link_libraries(x3d2 PRIVATE MPI::MPI_Fortran) | ||
target_link_libraries(xcompact PRIVATE MPI::MPI_Fortran) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -64,7 +64,7 @@ module m_allocator | |
contains | ||
|
||
function allocator_init(mesh, sz) result(allocator) | ||
type(mesh_t), target, intent(inout) :: mesh | ||
class(mesh_t), target, intent(inout) :: mesh | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we have mesh subtypes now? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. no indeed, that was a reminiscence of a previous implementation |
||
integer, intent(in) :: sz | ||
type(allocator_t) :: allocator | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
module m_decomp | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Module could use some comments |
||
use m_mesh_content, only: par_t, grid_t | ||
implicit none | ||
|
||
! Object implementing the actual decomposition | ||
type, abstract :: decomp_t | ||
contains | ||
procedure(decomposition), public, deferred :: decomposition | ||
end type decomp_t | ||
|
||
! Abstraction layer to allow different version of the decomposition computation | ||
type, public :: decomp_mod_t | ||
class(decomp_t), allocatable :: decomp | ||
contains | ||
procedure, public :: decomp_grid | ||
end type | ||
|
||
interface decomp_mod_t | ||
module procedure init_decomp_mod | ||
end interface | ||
|
||
interface | ||
module subroutine init_decomp(decomp) | ||
class(decomp_t), allocatable, intent(out) :: decomp | ||
end subroutine | ||
|
||
module subroutine decomposition(self, grid, par) | ||
use m_mesh_content, only: par_t, grid_t | ||
class(decomp_t) :: self | ||
class(grid_t), intent(inout) :: grid | ||
class(par_t), intent(inout) :: par | ||
end subroutine | ||
|
||
end interface | ||
|
||
contains | ||
|
||
function init_decomp_mod() result(decomp) | ||
type(decomp_mod_t) :: decomp | ||
|
||
call init_decomp(decomp%decomp) | ||
|
||
end function | ||
|
||
module subroutine decomp_grid(self, grid, par) | ||
class(decomp_mod_t) :: self | ||
class(grid_t), intent(inout) :: grid | ||
class(par_t), intent(inout) :: par | ||
|
||
call self%decomp%decomposition(grid, par) | ||
end subroutine | ||
|
||
|
||
end module m_decomp |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Remove commented code