diff --git a/core/src/CMakeLists.txt b/core/src/CMakeLists.txt index 66c656893..639b74a32 100644 --- a/core/src/CMakeLists.txt +++ b/core/src/CMakeLists.txt @@ -32,6 +32,12 @@ if(Cabana_ENABLE_MPI) ) endif() +if(Cabana_ENABLE_CAJITA) + list(APPEND HEADERS_PUBLIC + Cabana_ParticleGridCommunication.hpp + ) +endif() + set(HEADERS_IMPL impl/Cabana_CartesianGrid.hpp impl/Cabana_Index.hpp @@ -54,6 +60,10 @@ if(Cabana_ENABLE_MPI) target_link_libraries(cabanacore MPI::MPI_CXX) endif() +if(Cabana_ENABLE_CAJITA) + target_link_libraries(cabanacore Cajita) +endif() + target_include_directories(cabanacore PUBLIC $ diff --git a/core/src/Cabana_CommunicationPlan.hpp b/core/src/Cabana_CommunicationPlan.hpp index bdb8dac3e..05c62d138 100644 --- a/core/src/Cabana_CommunicationPlan.hpp +++ b/core/src/Cabana_CommunicationPlan.hpp @@ -30,6 +30,7 @@ namespace Cabana { namespace Impl { + //---------------------------------------------------------------------------// // Count sends and create steering algorithm tags. struct CountSendsAndCreateSteeringDuplicated @@ -246,6 +247,29 @@ auto countSendsAndCreateSteering( const ExportRankView element_export_ranks, return std::make_pair( neighbor_counts, neighbor_ids ); } +//---------------------------------------------------------------------------// +// Return unique neighbor ranks, with the current rank first. +std::vector getUniqueTopology( std::vector topology ) +{ + auto remove_end = std::remove( topology.begin(), topology.end(), -1 ); + std::sort( topology.begin(), remove_end ); + auto unique_end = std::unique( topology.begin(), remove_end ); + topology.resize( std::distance( topology.begin(), unique_end ) ); + + // Put this rank first. + int my_rank = -1; + MPI_Comm_rank( MPI_COMM_WORLD, &my_rank ); + for ( auto& n : topology ) + { + if ( n == my_rank ) + { + std::swap( n, topology[0] ); + break; + } + } + return topology; +} + //---------------------------------------------------------------------------// } // end namespace Impl @@ -464,11 +488,8 @@ class CommunicationPlan // Store the number of export elements. _num_export_element = element_export_ranks.size(); - // Store the unique neighbors. - _neighbors = neighbor_ranks; - std::sort( _neighbors.begin(), _neighbors.end() ); - auto unique_end = std::unique( _neighbors.begin(), _neighbors.end() ); - _neighbors.resize( std::distance( _neighbors.begin(), unique_end ) ); + // Store the unique neighbors (this rank first). + _neighbors = Impl::getUniqueTopology( neighbor_ranks ); int num_n = _neighbors.size(); // Get the size of this communicator. @@ -483,15 +504,6 @@ class CommunicationPlan // communication space so any mpi tag will do. const int mpi_tag = 1221; - // If we are sending to ourself put that one first in the neighbor - // list. - for ( auto& n : _neighbors ) - if ( n == my_rank ) - { - std::swap( n, _neighbors[0] ); - break; - } - // Initialize import/export sizes. _num_export.assign( num_n, 0 ); _num_import.assign( num_n, 0 ); diff --git a/core/src/Cabana_Core.hpp b/core/src/Cabana_Core.hpp index 3f72e795d..67737f3a3 100644 --- a/core/src/Cabana_Core.hpp +++ b/core/src/Cabana_Core.hpp @@ -33,6 +33,10 @@ #include #endif +#ifdef Cabana_ENABLE_CAJITA +#include +#endif + #ifdef Cabana_ENABLE_ARBORX #include #endif diff --git a/core/src/Cabana_Halo.hpp b/core/src/Cabana_Halo.hpp index d477db04c..d18c97a61 100644 --- a/core/src/Cabana_Halo.hpp +++ b/core/src/Cabana_Halo.hpp @@ -21,6 +21,7 @@ #include #include +#include #include namespace Cabana @@ -199,60 +200,65 @@ struct is_halo : public is_halo_impl::type>::type { }; -//---------------------------------------------------------------------------// -/*! - \brief Synchronously gather data from the local decomposition to the ghosts - using the halo forward communication plan. AoSoA version. This is a - uniquely-owned to multiply-owned communication. - - A gather sends data from a locally owned elements to one or many ranks on - which they exist as ghosts. A locally owned element may be sent to as many - ranks as desired to be used as a ghost on those ranks. The value of the - element in the locally owned decomposition will be the value assigned to the - element in the ghosted decomposition. - - \tparam Halo_t Halo type - must be a Halo. - - \tparam AoSoA_t AoSoA type - must be an AoSoA. +namespace Impl +{ - \param halo The halo to use for the gather. +//---------------------------------------------------------------------------// +// Check that the AoSoA/slice is the right size. +template +void checkSize( const Halo_t& halo, Container_t& container ) +{ + if ( container.size() != halo.numLocal() + halo.numGhost() ) + throw std::runtime_error( "AoSoA/slice is the wrong size for gather!" ); +} - \param aosoa The AoSoA on which to perform the gather. The AoSoA should have - a size equivalent to halo.numGhost() + halo.numLocal(). The locally owned - elements are expected to appear first (i.e. in the first halo.numLocal() - elements) and the ghosted elements are expected to appear second (i.e. in - the next halo.numGhost() elements()). -*/ -template -void gather( const Halo_t& halo, AoSoA_t& aosoa, - typename std::enable_if<( is_halo::value && - is_aosoa::value ), - int>::type* = 0 ) +//---------------------------------------------------------------------------// +// Fill the data to be sent. AoSoA variant. +template +void fillSends( const Halo_t& halo, AoSoA_t& aosoa, View_t& send_buffer ) { - // Check that the AoSoA is the right size. - if ( aosoa.size() != halo.numLocal() + halo.numGhost() ) - throw std::runtime_error( "AoSoA is the wrong size for gather!" ); + // Get the steering vector for the sends. + auto steering = halo.getExportSteering(); - // Allocate a send buffer. - Kokkos::View - send_buffer( - Kokkos::ViewAllocateWithoutInitializing( "halo_send_buffer" ), - halo.totalNumExport() ); + // Gather from the local data into a tuple-contiguous send buffer. + auto gather_send_buffer_func = KOKKOS_LAMBDA( const std::size_t i ) + { + send_buffer( i ) = aosoa.getTuple( steering( i ) ); + }; + Kokkos::RangePolicy + gather_send_buffer_policy( 0, halo.totalNumExport() ); + Kokkos::parallel_for( "Cabana::gather::gather_send_buffer", + gather_send_buffer_policy, gather_send_buffer_func ); + Kokkos::fence(); +} +// Fill the data to be sent with modifications. AoSoA variant. +template +void fillSends( const Halo_t& halo, AoSoA_t& aosoa, View_t& send_buffer, + const Modify_t& modify_functor ) +{ // Get the steering vector for the sends. auto steering = halo.getExportSteering(); // Gather from the local data into a tuple-contiguous send buffer. + // Pass send buffer to user modification functor class to add shifts. auto gather_send_buffer_func = KOKKOS_LAMBDA( const std::size_t i ) { send_buffer( i ) = aosoa.getTuple( steering( i ) ); + modify_functor( send_buffer, i ); }; Kokkos::RangePolicy gather_send_buffer_policy( 0, halo.totalNumExport() ); Kokkos::parallel_for( "Cabana::gather::gather_send_buffer", gather_send_buffer_policy, gather_send_buffer_func ); Kokkos::fence(); +} +// Extract the received data. AoSoA variant. +template +void sendReceiveExtract( const Halo_t& halo, AoSoA_t& aosoa, + const View_t& send_buffer ) +{ // Allocate a receive buffer. Kokkos::View recv_buffer( @@ -321,55 +327,42 @@ void gather( const Halo_t& halo, AoSoA_t& aosoa, } //---------------------------------------------------------------------------// -/*! - \brief Synchronously gather data from the local decomposition to the ghosts - using the halo forward communication plan. Slice version. This is a - uniquely-owned to multiply-owned communication. - - A gather sends data from a locally owned elements to one or many ranks on - which they exist as ghosts. A locally owned element may be sent to as many - ranks as desired to be used as a ghost on those ranks. The value of the - element in the locally owned decomposition will be the value assigned to the - element in the ghosted decomposition. - - \tparam Halo_t Halo type - must be a Halo. +// Fill the data to be sent. Slice variant. +template +void fillSends( const Halo_t& halo, Slice_t& slice, View_t& send_buffer, + std::size_t num_comp ) +{ + // Get the raw slice data. + auto slice_data = slice.data(); - \tparam Slice_t Slice type - must be a Slice. + // Get the steering vector for the sends. + auto steering = halo.getExportSteering(); - \param halo The halo to use for the gather. + // Gather from the local data into a tuple-contiguous send buffer. + auto gather_send_buffer_func = KOKKOS_LAMBDA( const std::size_t i ) + { + auto s = Slice_t::index_type::s( steering( i ) ); + auto a = Slice_t::index_type::a( steering( i ) ); + std::size_t slice_offset = s * slice.stride( 0 ) + a; + for ( std::size_t n = 0; n < num_comp; ++n ) + send_buffer( i, n ) = + slice_data[slice_offset + n * Slice_t::vector_length]; + }; + Kokkos::RangePolicy + gather_send_buffer_policy( 0, halo.totalNumExport() ); + Kokkos::parallel_for( "Cabana::gather::gather_send_buffer", + gather_send_buffer_policy, gather_send_buffer_func ); + Kokkos::fence(); +} - \param slice The Slice on which to perform the gather. The Slice should have - a size equivalent to halo.numGhost() + halo.numLocal(). The locally owned - elements are expected to appear first (i.e. in the first halo.numLocal() - elements) and the ghosted elements are expected to appear second (i.e. in - the next halo.numGhost() elements()). -*/ -template -void gather( const Halo_t& halo, Slice_t& slice, - typename std::enable_if<( is_halo::value && - is_slice::value ), - int>::type* = 0 ) +// Fill the data to be sent with modifications. Slice variant. +template +void fillSends( const Halo_t& halo, Slice_t& slice, View_t& send_buffer, + std::size_t num_comp, const Modify_t& modify_functor ) { - // Check that the Slice is the right size. - if ( slice.size() != halo.numLocal() + halo.numGhost() ) - throw std::runtime_error( "Slice is the wrong size for gather!" ); - - // Get the number of components in the slice. - std::size_t num_comp = 1; - for ( std::size_t d = 2; d < slice.rank(); ++d ) - num_comp *= slice.extent( d ); - // Get the raw slice data. auto slice_data = slice.data(); - // Allocate a send buffer. Note this one is layout right so the components - // are consecutive. - Kokkos::View - send_buffer( - Kokkos::ViewAllocateWithoutInitializing( "halo_send_buffer" ), - halo.totalNumExport(), num_comp ); - // Get the steering vector for the sends. auto steering = halo.getExportSteering(); @@ -380,22 +373,32 @@ void gather( const Halo_t& halo, Slice_t& slice, auto a = Slice_t::index_type::a( steering( i ) ); std::size_t slice_offset = s * slice.stride( 0 ) + a; for ( std::size_t n = 0; n < num_comp; ++n ) + { send_buffer( i, n ) = slice_data[slice_offset + n * Slice_t::vector_length]; + modify_functor( send_buffer, i, n ); + } }; Kokkos::RangePolicy gather_send_buffer_policy( 0, halo.totalNumExport() ); Kokkos::parallel_for( "Cabana::gather::gather_send_buffer", gather_send_buffer_policy, gather_send_buffer_func ); Kokkos::fence(); +} - // Allocate a receive buffer. Note this one is layout right so the - // components are consecutive. +template +void sendReceiveExtract( const Halo_t& halo, Slice_t& slice, + const View_t& send_buffer, std::size_t num_comp ) +{ + // Allocate a receive buffer. Kokkos::View recv_buffer( Kokkos::ViewAllocateWithoutInitializing( "halo_recv_buffer" ), - halo.totalNumImport(), num_comp ); + halo.totalNumExport(), num_comp ); + + // Get the raw slice data. + auto slice_data = slice.data(); // The halo has it's own communication space so choose any mpi tag. const int mpi_tag = 2345; @@ -465,6 +468,204 @@ void gather( const Halo_t& halo, Slice_t& slice, MPI_Barrier( halo.comm() ); } +} // namespace Impl + +//---------------------------------------------------------------------------// +/*! + \brief Synchronously gather data from the local decomposition to the ghosts + using the halo forward communication plan. AoSoA version, where the buffer is + modified before being sent. This is a uniquely-owned to multiply-owned + communication. + + A gather sends data from a locally owned elements to one or many ranks on + which they exist as ghosts. A locally owned element may be sent to as many + ranks as desired to be used as a ghost on those ranks. The value of the + element in the locally owned decomposition will be the value assigned to the + element in the ghosted decomposition. + + \tparam Halo_t Halo type - must be a Halo. + + \tparam AoSoA_t AoSoA type - must be an AoSoA. + + \tparam Modify_t Buffer modification type. + + \param halo The halo to use for the gather. + + \param aosoa The AoSoA on which to perform the gather. The AoSoA should have + a size equivalent to halo.numGhost() + halo.numLocal(). The locally owned + elements are expected to appear first (i.e. in the first halo.numLocal() + elements) and the ghosted elements are expected to appear second (i.e. in + the next halo.numGhost() elements()). + + \param modify_functor Class containing functor to modify the send buffer + before being sent (e.g. for periodic coordinate update). +*/ +template +void gather( const Halo_t& halo, AoSoA_t& aosoa, const Modify_t& modify_functor, + typename std::enable_if<( is_halo::value && + is_aosoa::value ), + int>::type* = 0 ) +{ + Impl::checkSize( halo, aosoa ); + + // Allocate a send buffer. + Kokkos::View + send_buffer( + Kokkos::ViewAllocateWithoutInitializing( "halo_send_buffer" ), + halo.totalNumExport() ); + + Impl::fillSends( halo, aosoa, send_buffer, modify_functor ); + Impl::sendReceiveExtract( halo, aosoa, send_buffer ); +} + +//---------------------------------------------------------------------------// +/*! + \brief Synchronously gather data from the local decomposition to the ghosts + using the halo forward communication plan. AoSoA version. This is a + uniquely-owned to multiply-owned communication. + + A gather sends data from a locally owned elements to one or many ranks on + which they exist as ghosts. A locally owned element may be sent to as many + ranks as desired to be used as a ghost on those ranks. The value of the + element in the locally owned decomposition will be the value assigned to the + element in the ghosted decomposition. + + \tparam Halo_t Halo type - must be a Halo. + + \tparam AoSoA_t AoSoA type - must be an AoSoA. + + \param halo The halo to use for the gather. + + \param aosoa The AoSoA on which to perform the gather. The AoSoA should have + a size equivalent to halo.numGhost() + halo.numLocal(). The locally owned + elements are expected to appear first (i.e. in the first halo.numLocal() + elements) and the ghosted elements are expected to appear second (i.e. in + the next halo.numGhost() elements()). +*/ +template +void gather( const Halo_t& halo, AoSoA_t& aosoa, + typename std::enable_if<( is_halo::value && + is_aosoa::value ), + int>::type* = 0 ) +{ + Impl::checkSize( halo, aosoa ); + + // Allocate a send buffer. + Kokkos::View + send_buffer( + Kokkos::ViewAllocateWithoutInitializing( "halo_send_buffer" ), + halo.totalNumExport() ); + + Impl::fillSends( halo, aosoa, send_buffer ); + Impl::sendReceiveExtract( halo, aosoa, send_buffer ); +} + +//---------------------------------------------------------------------------// +/*! + \brief Synchronously gather data from the local decomposition to the ghosts + using the halo forward communication plan. Slice version. This is a + uniquely-owned to multiply-owned communication. + + A gather sends data from a locally owned elements to one or many ranks on + which they exist as ghosts. A locally owned element may be sent to as many + ranks as desired to be used as a ghost on those ranks. The value of the + element in the locally owned decomposition will be the value assigned to the + element in the ghosted decomposition. + + \tparam Halo_t Halo type - must be a Halo. + + \tparam Slice_t Slice type - must be a slice. + + \param halo The halo to use for the gather. + + \param slice The slice on which to perform the gather. The slice should have + a size equivalent to halo.numGhost() + halo.numLocal(). The locally owned + elements are expected to appear first (i.e. in the first halo.numLocal() + elements) and the ghosted elements are expected to appear second (i.e. in + the next halo.numGhost() elements()). +*/ +template +void gather( const Halo_t& halo, Slice_t& slice, + typename std::enable_if<( is_halo::value && + is_slice::value ), + int>::type* = 0 ) +{ + // Check that the slice is the right size. + Impl::checkSize( halo, slice ); + + // Get the number of components in the slice. + std::size_t num_comp = 1; + for ( std::size_t d = 2; d < slice.rank(); ++d ) + num_comp *= slice.extent( d ); + + // Allocate a send buffer. Note this one is layout right so the components + // are consecutive. + Kokkos::View + send_buffer( + Kokkos::ViewAllocateWithoutInitializing( "halo_send_buffer" ), + halo.totalNumExport(), num_comp ); + + Impl::fillSends( halo, slice, send_buffer, num_comp ); + Impl::sendReceiveExtract( halo, slice, send_buffer, num_comp ); +} + +//---------------------------------------------------------------------------// +/*! + \brief Synchronously gather data from the local decomposition to the ghosts + using the halo forward communication plan. Slice version, where the buffer is + modified before being sent. This is a uniquely-owned to multiply-owned + communication. + + A gather sends data from a locally owned elements to one or many ranks on + which they exist as ghosts. A locally owned element may be sent to as many + ranks as desired to be used as a ghost on those ranks. The value of the + element in the locally owned decomposition will be the value assigned to the + element in the ghosted decomposition. + + \tparam Halo_t Halo type - must be a Halo. + + \tparam Slice_t Slice type - must be a slice. + + \tparam Modify_t Buffer modification type. + + \param halo The halo to use for the gather. + + \param slice The slice on which to perform the gather. The slice should have + a size equivalent to halo.numGhost() + halo.numLocal(). The locally owned + elements are expected to appear first (i.e. in the first halo.numLocal() + elements) and the ghosted elements are expected to appear second (i.e. in + the next halo.numGhost() elements()). + + \param modify_functor Class containing functor to modify the send buffer + before being sent (e.g. for periodic coordinate update). +*/ +template +void gather( const Halo_t& halo, Slice_t& slice, const Modify_t& modify_functor, + typename std::enable_if<( is_halo::value && + is_slice::value ), + int>::type* = 0 ) +{ + // Check that the slice is the right size. + Impl::checkSize( halo, slice ); + + // Get the number of components in the slice. + std::size_t num_comp = 1; + for ( std::size_t d = 2; d < slice.rank(); ++d ) + num_comp *= slice.extent( d ); + + // Allocate a send buffer. Note this one is layout right so the components + // are consecutive. + Kokkos::View + send_buffer( + Kokkos::ViewAllocateWithoutInitializing( "halo_send_buffer" ), + halo.totalNumExport(), num_comp ); + + Impl::fillSends( halo, slice, send_buffer, num_comp, modify_functor ); + Impl::sendReceiveExtract( halo, slice, send_buffer, num_comp ); +} + //---------------------------------------------------------------------------// /*! \brief Synchronously scatter data from the ghosts to the local decomposition @@ -617,6 +818,6 @@ void scatter( const Halo_t& halo, Slice_t& slice, //---------------------------------------------------------------------------// -} // end namespace Cabana +} // namespace Cabana #endif // end CABANA_HALO_HPP diff --git a/core/src/Cabana_ParticleGridCommunication.hpp b/core/src/Cabana_ParticleGridCommunication.hpp new file mode 100644 index 000000000..fe1846232 --- /dev/null +++ b/core/src/Cabana_ParticleGridCommunication.hpp @@ -0,0 +1,919 @@ +/**************************************************************************** + * Copyright (c) 2018-2020 by the Cabana authors * + * All rights reserved. * + * * + * This file is part of the Cabana library. Cabana is distributed under a * + * BSD 3-clause license. For the licensing terms see the LICENSE file in * + * the top-level directory. * + * * + * SPDX-License-Identifier: BSD-3-Clause * + ****************************************************************************/ + +#ifndef CABANA_GRIDCOMM_HPP +#define CABANA_GRIDCOMM_HPP + +#include +#include +#include + +#include +#include +#include +#include + +#include + +#include + +namespace Cabana +{ + +namespace Impl +{ + +//---------------------------------------------------------------------------// +// Of the 27 potential local grids figure out which are in our topology. +// Some of the ranks in this list may be invalid. This needs to be updated +// after computing destination ranks to only contain valid ranks. +template +auto getTopology( const LocalGridType& local_grid ) +{ + std::vector topology( 27, -1 ); + int nr = 0; + for ( int k = -1; k < 2; ++k ) + for ( int j = -1; j < 2; ++j ) + for ( int i = -1; i < 2; ++i, ++nr ) + topology[nr] = local_grid.neighborRank( i, j, k ); + return topology; +} + +} // namespace Impl + +//---------------------------------------------------------------------------// +// Grid Distributor/migrate +//---------------------------------------------------------------------------// + +//---------------------------------------------------------------------------// +/*! + \brief Check for the number of particles that must be communicated + + \tparam LocalGridType Cajita LocalGrid type. + + \tparam PositionSliceType Particle position type. + + \param local_grid The local grid containing periodicity and system bound + information. + + \param positions The particle position container, either Slice or View. + + \param minimum_halo_width Number of halo mesh widths to include for ghosting. +*/ +template +int migrateCount( const LocalGridType& local_grid, + const PositionSliceType& positions, + const int minimum_halo_width ) +{ + using execution_space = typename PositionSliceType::execution_space; + + // Check within the halo width, within the ghosted domain. + const auto& local_mesh = + Cajita::createLocalMesh( local_grid ); + auto dx = local_grid.globalGrid().globalMesh().cellSize( Cajita::Dim::I ); + auto dy = local_grid.globalGrid().globalMesh().cellSize( Cajita::Dim::J ); + auto dz = local_grid.globalGrid().globalMesh().cellSize( Cajita::Dim::K ); + const Kokkos::Array local_low = { + local_mesh.lowCorner( Cajita::Ghost(), Cajita::Dim::I ) + + minimum_halo_width * dx, + local_mesh.lowCorner( Cajita::Ghost(), Cajita::Dim::J ) + + minimum_halo_width * dy, + local_mesh.lowCorner( Cajita::Ghost(), Cajita::Dim::K ) + + minimum_halo_width * dz }; + const Kokkos::Array local_high = { + local_mesh.highCorner( Cajita::Ghost(), Cajita::Dim::I ) - + minimum_halo_width * dx, + local_mesh.highCorner( Cajita::Ghost(), Cajita::Dim::J ) - + minimum_halo_width * dy, + local_mesh.highCorner( Cajita::Ghost(), Cajita::Dim::K ) - + minimum_halo_width * dz }; + int comm_count = 0; + Kokkos::parallel_reduce( + "redistribute_count", + Kokkos::RangePolicy( 0, positions.size() ), + KOKKOS_LAMBDA( const int p, int& result ) { + if ( positions( p, Cajita::Dim::I ) < local_low[Cajita::Dim::I] || + positions( p, Cajita::Dim::I ) > local_high[Cajita::Dim::I] || + positions( p, Cajita::Dim::J ) < local_low[Cajita::Dim::J] || + positions( p, Cajita::Dim::J ) > local_high[Cajita::Dim::J] || + positions( p, Cajita::Dim::K ) < local_low[Cajita::Dim::K] || + positions( p, Cajita::Dim::K ) > local_high[Cajita::Dim::K] ) + result += 1; + }, + comm_count ); + + MPI_Allreduce( MPI_IN_PLACE, &comm_count, 1, MPI_INT, MPI_SUM, + local_grid.globalGrid().comm() ); + + return comm_count; +} + +namespace Impl +{ +//---------------------------------------------------------------------------// +// Locate the particles in the local grid and get their destination rank. +// Particles are assumed to only migrate to a location in the 26 neighbor halo +// or stay on this rank. If the particle crosses a global periodic boundary, +// wrap it's coordinates back into the domain. +template +void getMigrateDestinations( const LocalGridType& local_grid, + const NeighborRankView& neighbor_ranks, + DestinationRankView& destinations, + const PositionSliceType& positions ) +{ + using execution_space = typename PositionSliceType::execution_space; + + const auto& local_mesh = + Cajita::createLocalMesh( local_grid ); + + // Check within the local domain. + const Kokkos::Array local_low = { + local_mesh.lowCorner( Cajita::Own(), Cajita::Dim::I ), + local_mesh.lowCorner( Cajita::Own(), Cajita::Dim::J ), + local_mesh.lowCorner( Cajita::Own(), Cajita::Dim::K ) }; + const Kokkos::Array local_high = { + local_mesh.highCorner( Cajita::Own(), Cajita::Dim::I ), + local_mesh.highCorner( Cajita::Own(), Cajita::Dim::J ), + local_mesh.highCorner( Cajita::Own(), Cajita::Dim::K ) }; + + // Use global domain for periodicity. + const auto& global_grid = local_grid.globalGrid(); + const auto& global_mesh = global_grid.globalMesh(); + const Kokkos::Array periodic = { + global_grid.isPeriodic( Cajita::Dim::I ), + global_grid.isPeriodic( Cajita::Dim::J ), + global_grid.isPeriodic( Cajita::Dim::K ) }; + const Kokkos::Array global_low = { + global_mesh.lowCorner( Cajita::Dim::I ), + global_mesh.lowCorner( Cajita::Dim::J ), + global_mesh.lowCorner( Cajita::Dim::K ) }; + const Kokkos::Array global_high = { + global_mesh.highCorner( Cajita::Dim::I ), + global_mesh.highCorner( Cajita::Dim::J ), + global_mesh.highCorner( Cajita::Dim::K ) }; + const Kokkos::Array global_extent = { + global_mesh.extent( Cajita::Dim::I ), + global_mesh.extent( Cajita::Dim::J ), + global_mesh.extent( Cajita::Dim::K ) }; + + Kokkos::parallel_for( + "get_migrate_destinations", + Kokkos::RangePolicy( 0, positions.size() ), + KOKKOS_LAMBDA( const int p ) { + // Compute the logical index of the neighbor we are sending to. + int nid[3] = { 1, 1, 1 }; + for ( int d = 0; d < 3; ++d ) + { + if ( positions( p, d ) < local_low[d] ) + nid[d] = 0; + else if ( positions( p, d ) > local_high[d] ) + nid[d] = 2; + } + + // Compute the destination MPI rank. + destinations( p ) = neighbor_ranks( + nid[Cajita::Dim::I] + + 3 * ( nid[Cajita::Dim::J] + 3 * nid[Cajita::Dim::K] ) ); + + // Shift periodic coordinates if needed. + for ( int d = 0; d < 3; ++d ) + { + if ( periodic[d] ) + { + if ( positions( p, d ) > global_high[d] ) + positions( p, d ) -= global_extent[d]; + else if ( positions( p, d ) < global_low[d] ) + positions( p, d ) += global_extent[d]; + } + } + } ); +} + +} // namespace Impl + +//---------------------------------------------------------------------------// +/*! + \brief Determine which data should be migrated from one + uniquely-owned decomposition to another uniquely-owned decomposition, using + bounds of a Cajita grid and taking periodic boundaries into account. + + \tparam LocalGridType Cajita LocalGrid type. + + \tparam PositionContainer AoSoA type. + + \param local_grid The local grid containing periodicity and system bound + information. + + \param positions The particle positions. + + \return Distributor for later migration. +*/ +template +Distributor +createGridDistributor( const LocalGridType& local_grid, + PositionSliceType& positions ) +{ + using device_type = typename PositionSliceType::device_type; + + // Get all 26 neighbor ranks. + auto topology = Impl::getTopology( local_grid ); + + Kokkos::View + topology_host( topology.data(), topology.size() ); + auto topology_mirror = + Kokkos::create_mirror_view_and_copy( device_type(), topology_host ); + Kokkos::View destinations( + Kokkos::ViewAllocateWithoutInitializing( "destinations" ), + positions.size() ); + + // Determine destination ranks for all particles and wrap positions across + // periodic boundaries. + Impl::getMigrateDestinations( local_grid, topology_mirror, destinations, + positions ); + + // Create the Cabana distributor. + Distributor distributor( local_grid.globalGrid().comm(), + destinations, topology ); + return distributor; +} + +//---------------------------------------------------------------------------// +/*! + \brief Migrate data from one uniquely-owned decomposition to another + uniquely-owned decomposition, using the bounds and periodic boundaries of a + Cajita grid to determine which particles should be moved. In-place variant. + + \tparam LocalGridType Cajita LocalGrid type. + + \tparam PositionContainer AoSoA type. + + \tparam PositionIndex Particle position index within the AoSoA. + + \param local_grid The local grid containing periodicity and system bound + information. + + \param particles The particle AoSoA, containing positions. + + \param PositionIndex Particle position index within the AoSoA. + + \param min_halo_width Number of halo mesh widths to allow particles before + migrating. + + \param force_migrate Migrate particles outside the local domain regardless of + ghosted halo. +*/ +template +void gridMigrate( const LocalGridType& local_grid, ParticleContainer& particles, + std::integral_constant, + const int min_halo_width, const bool force_migrate = false ) +{ + // Get the positions. + auto positions = slice( particles ); + + // When false, this option checks that any particles are nearly outside the + // ghosted halo region (outside the min_halo_width) before initiating + // migration. Otherwise, anything outside the local domain is migrated + // regardless of position in the halo. + if ( !force_migrate ) + { + // Check to see if we need to communicate. + auto comm_count = migrateCount( local_grid, positions, min_halo_width ); + + // If we have no particles near the ghosted boundary, then exit. + if ( 0 == comm_count ) + return; + } + + auto distributor = createGridDistributor( local_grid, positions ); + + // Redistribute the particles. + migrate( distributor, particles ); +} + +//---------------------------------------------------------------------------// +/*! + \brief Migrate data from one uniquely-owned decomposition to another + uniquely-owned decomposition, using the bounds and periodic boundaries of a + Cajita grid to determine which particles should be moved. Separate AoSoA + variant. + + \tparam LocalGridType Cajita LocalGrid type. + + \tparam ParticleContainer AoSoA type. + + \tparam PositionIndex Particle position index within the AoSoA. + + \param local_grid The local grid containing periodicity and system bound + information. + + \param src_particles The source particle AoSoA, containing positions. + + \param PositionIndex Particle position index within the AoSoA. + + \param src_particles The destination particle AoSoA, containing positions. + + \param min_halo_width Number of halo mesh widths to allow particles before + migrating. + + \param force_migrate Migrate particles outside the local domain regardless of + ghosted halo. +*/ +template +void gridMigrate( const LocalGridType& local_grid, + ParticleContainer& src_particles, + std::integral_constant, + ParticleContainer& dst_particles, const int min_halo_width, + const bool force_migrate = false ) +{ + // Get the positions. + auto positions = slice( src_particles ); + + // When false, this option checks that any particles are nearly outside the + // ghosted halo region (outside the min_halo_width) before initiating + // migration. Otherwise, anything outside the local domain is migrated + // regardless of position in the halo. + if ( !force_migrate ) + { + // Check to see if we need to communicate. + auto comm_count = migrateCount( local_grid, positions, min_halo_width ); + + // If we have no particles near the ghosted boundary, copy, then exit. + if ( 0 == comm_count ) + { + Cabana::deep_copy( dst_particles, src_particles ); + return; + } + } + + auto distributor = createGridDistributor( local_grid, positions ); + + // Resize as needed. + dst_particles.resize( distributor.totalNumImport() ); + + // Redistribute the particles. + migrate( distributor, src_particles, dst_particles ); +} + +//---------------------------------------------------------------------------// +// Grid Halo/gather +//---------------------------------------------------------------------------// + +namespace Impl +{ + +// Functor to determine which particles should be ghosted with Cajita grid. +template +struct HaloIds +{ + Kokkos::Array _periodic; + Kokkos::Array _global_low; + Kokkos::Array _global_high; + Kokkos::Array _global_extent; + + int _min_halo; + int _neighbor_rank; + + CountView _send_count; + DestinationRankView _destinations; + DestinationRankView _ids; + ShiftViewType _shifts; + PositionSliceType _positions; + + Kokkos::Array _ijk; + Kokkos::Array _min_coord; + Kokkos::Array _max_coord; + + HaloIds( const LocalGridType& local_grid, + const PositionSliceType& positions, CountView& send_count, + DestinationRankView& destinations, DestinationRankView& ids, + ShiftViewType& shifts, const int minimum_halo_width ) + { + _send_count = send_count; + _destinations = destinations; + _ids = ids; + _shifts = shifts; + _positions = positions; + + // Check within the halo width, within the local domain. + const auto& global_grid = local_grid.globalGrid(); + _periodic = { global_grid.isPeriodic( Cajita::Dim::I ), + global_grid.isPeriodic( Cajita::Dim::J ), + global_grid.isPeriodic( Cajita::Dim::K ) }; + auto dx = + local_grid.globalGrid().globalMesh().cellSize( Cajita::Dim::I ); + auto dy = + local_grid.globalGrid().globalMesh().cellSize( Cajita::Dim::J ); + auto dz = + local_grid.globalGrid().globalMesh().cellSize( Cajita::Dim::K ); + const auto& global_mesh = global_grid.globalMesh(); + _global_low = { + global_mesh.lowCorner( Cajita::Dim::I ) + minimum_halo_width * dx, + global_mesh.lowCorner( Cajita::Dim::J ) + minimum_halo_width * dy, + global_mesh.lowCorner( Cajita::Dim::K ) + minimum_halo_width * dz }; + _global_high = { + global_mesh.highCorner( Cajita::Dim::I ) - minimum_halo_width * dx, + global_mesh.highCorner( Cajita::Dim::J ) - minimum_halo_width * dy, + global_mesh.highCorner( Cajita::Dim::K ) - + minimum_halo_width * dz }; + _global_extent = { global_mesh.extent( Cajita::Dim::I ), + global_mesh.extent( Cajita::Dim::J ), + global_mesh.extent( Cajita::Dim::K ) }; + + _min_halo = minimum_halo_width; + + build( local_grid ); + } + + KOKKOS_INLINE_FUNCTION void operator()( const int p ) const + { + Kokkos::Array pos = { _positions( p, Cajita::Dim::I ), + _positions( p, Cajita::Dim::J ), + _positions( p, Cajita::Dim::K ) }; + + // Check the if particle is both in the owned space + // and the ghosted space of this neighbor (ignore + // the current cell). + if ( ( pos[Cajita::Dim::I] > _min_coord[Cajita::Dim::I] && + pos[Cajita::Dim::I] < _max_coord[Cajita::Dim::I] ) && + ( pos[Cajita::Dim::J] > _min_coord[Cajita::Dim::J] && + pos[Cajita::Dim::J] < _max_coord[Cajita::Dim::J] ) && + ( pos[Cajita::Dim::K] > _min_coord[Cajita::Dim::K] && + pos[Cajita::Dim::K] < _max_coord[Cajita::Dim::K] ) ) + { + const std::size_t sc = _send_count()++; + // If the size of the arrays is exceeded, keep + // counting to resize and fill next. + if ( sc < _destinations.extent( 0 ) ) + { + // Keep the destination MPI rank. + _destinations( sc ) = _neighbor_rank; + // Keep the particle ID. + _ids( sc ) = p; + // Determine if this ghost particle needs to + // be shifted through the periodic boundary. + for ( int d = 0; d < 3; ++d ) + { + _shifts( sc, d ) = 0.0; + if ( _periodic[d] && _ijk[d] ) + { + if ( pos[d] > _global_high[d] ) + _shifts( sc, d ) = -_global_extent[d]; + else if ( pos[d] < _global_low[d] ) + _shifts( sc, d ) = _global_extent[d]; + } + } + } + } + } + + //---------------------------------------------------------------------------// + // Locate particles within the local grid and determine if any from this + // rank need to be ghosted to one (or more) of the 26 neighbor ranks, + // keeping track of destination rank, index in the container, and periodic + // shift needed (but not yet applied). + void build( const LocalGridType& local_grid ) + { + using execution_space = typename PositionSliceType::execution_space; + const auto& local_mesh = + Cajita::createLocalMesh( local_grid ); + + auto policy = + Kokkos::RangePolicy( 0, _positions.size() ); + + // Add a ghost if this particle is near the local boundary, potentially + // for each of the 26 neighbors cells. Do this one neighbor rank at a + // time so that sends are contiguous. + auto topology = getTopology( local_grid ); + auto unique_topology = getUniqueTopology( topology ); + for ( std::size_t ar = 0; ar < unique_topology.size(); ar++ ) + { + int nr = 0; + for ( int k = -1; k < 2; ++k ) + { + for ( int j = -1; j < 2; ++j ) + { + for ( int i = -1; i < 2; ++i, ++nr ) + { + if ( i != 0 || j != 0 || k != 0 ) + { + const int _neighbor_rank = topology[nr]; + if ( _neighbor_rank == unique_topology[ar] ) + { + auto sis = local_grid.sharedIndexSpace( + Cajita::Own(), Cajita::Cell(), i, j, k, + _min_halo ); + const int min_ind_i = sis.min( Cajita::Dim::I ); + const int min_ind_j = sis.min( Cajita::Dim::J ); + const int min_ind_k = sis.min( Cajita::Dim::K ); + Kokkos::Array min_ind = { + min_ind_i, min_ind_j, min_ind_k }; + const int max_ind_i = + sis.max( Cajita::Dim::I ) + 1; + const int max_ind_j = + sis.max( Cajita::Dim::J ) + 1; + const int max_ind_k = + sis.max( Cajita::Dim::K ) + 1; + Kokkos::Array max_ind = { + max_ind_i, max_ind_j, max_ind_k }; + + local_mesh.coordinates( Cajita::Node(), + min_ind.data(), + _min_coord.data() ); + local_mesh.coordinates( Cajita::Node(), + max_ind.data(), + _max_coord.data() ); + _ijk = { i, j, k }; + + Kokkos::parallel_for( "get_halo_ids", policy, + *this ); + Kokkos::fence(); + } + } + } + } + } + // Shift periodic coordinates in send buffers. + } + } + + void rebuild( const LocalGridType& local_grid ) + { + // Resize views to actual send sizes. + int dest_size = _destinations.extent( 0 ); + int dest_count = 0; + Kokkos::deep_copy( dest_count, _send_count ); + if ( dest_count != dest_size ) + { + Kokkos::resize( _destinations, dest_count ); + Kokkos::resize( _ids, dest_count ); + Kokkos::resize( _shifts, dest_count, 3 ); + } + + // If original view sizes were exceeded, only counting was done so + // we need to rerun. + if ( dest_count > dest_size ) + { + Kokkos::deep_copy( _send_count, 0 ); + build( local_grid ); + } + } +}; + +//---------------------------------------------------------------------------// +// Determine which particles should be ghosted, reallocating and recounting if +// needed. +template +auto getHaloIDs( + const LocalGridType& local_grid, const PositionSliceType& positions, + const int min_halo_width, const int max_export_guess, + typename std::enable_if::value, int>::type* = + 0 ) +{ + using device_type = typename PositionSliceType::device_type; + using pos_value = typename PositionSliceType::value_type; + + // Get all 26 neighbor ranks. + auto topology = Impl::getTopology( local_grid ); + + using DestinationRankView = typename Kokkos::View; + using ShiftViewType = typename Kokkos::View; + using CountView = + typename Kokkos::View>; + DestinationRankView destinations( + Kokkos::ViewAllocateWithoutInitializing( "destinations" ), + max_export_guess ); + DestinationRankView ids( Kokkos::ViewAllocateWithoutInitializing( "ids" ), + max_export_guess ); + ShiftViewType shifts( Kokkos::ViewAllocateWithoutInitializing( "shifts" ), + max_export_guess, 3 ); + CountView send_count( "halo_send_count" ); + + // Determine which particles need to be ghosted to neighbors. + auto halo_ids = Impl::HaloIds( + local_grid, positions, send_count, destinations, ids, shifts, + min_halo_width ); + // Rebuild if needed. + halo_ids.rebuild( local_grid ); + + // Create the Cabana Halo. + auto halo = + Halo( local_grid.globalGrid().comm(), positions.size(), + ids, destinations, topology ); + return std::make_pair( halo, shifts ); +} + +} // namespace Impl + +//---------------------------------------------------------------------------// +/*! + \class PeriodicShift + + \brief Store periodic shifts for halo communication. + + \tparam DeviceType Device type for which the data for this class will be + allocated and where parallel execution occurs. + + Ideally this would inherit from Halo (PeriodicHalo), combining the periodic + shift and halo together. This is not currently done because the + CommunicationPlan contains std member variables that would be captured on the + device (warnings with NVCC). +*/ +template +struct PeriodicShift +{ + Kokkos::View _shifts; + + /*! + \brief Constructor + + \tparam ShiftViewType The periodic shift Kokkos View type. + + \param shifts The periodic shifts for each element being sent. + */ + template + PeriodicShift( const ShiftViewType shifts ) + : _shifts( shifts ) + { + } +}; + +/*! + \class PeriodicModifyAoSoA + + \brief Modify AoSoA buffer with periodic shifts during gather. + + \tparam DeviceType Device type for which the data for this class will be + allocated and where parallel execution occurs. + + \tparam PositionIndex Particle position index within the AoSoA. +*/ +template +struct PeriodicModifyAoSoA : PeriodicShift +{ + using PeriodicShift::PeriodicShift; + using PeriodicShift::_shifts; + + std::size_t dim = _shifts.extent( 1 ); + + /*! + \brief Modify the send buffer with periodic shifts. + + \tparam ViewType The container type for the send buffer. + + \param send_buffer Send buffer of positions being ghosted. + + \param i Particle index. + */ + template + KOKKOS_INLINE_FUNCTION void operator()( ViewType& send_buffer, + const int i ) const + { + for ( std::size_t d = 0; d < dim; ++d ) + get( send_buffer( i ), d ) += _shifts( i, d ); + } +}; + +/*! + \class PeriodicModifySlice + + \brief Modify slice buffer with periodic shifts during gather. + + \tparam DeviceType Device type for which the data for this class will be + allocated and where parallel execution occurs. +*/ +template +struct PeriodicModifySlice : PeriodicShift +{ + using PeriodicShift::PeriodicShift; + using PeriodicShift::_shifts; + + /*! + \brief Modify the send buffer with periodic shifts. + + \tparam ViewType The container type for the send buffer. + + \param send_buffer Send buffer of positions being ghosted. + + \param i Particle index. + + \param d Dimension index. + */ + template + KOKKOS_INLINE_FUNCTION void operator()( ViewType& send_buffer, const int i, + const int d ) const + { + send_buffer( i, d ) += _shifts( i, d ); + } +}; + +//---------------------------------------------------------------------------// +/*! + \class GridHalo + + \brief Store communication Halo and Modify functor. + + \tparam HaloType Halo type. + + \tparam ModifyType Modification functor type. +*/ +template +class GridHalo +{ + const HaloType _halo; + const ModifyType _modify; + + public: + /* + \brief Constructor + + \param halo Halo for gather/scatter. + + \param modify Store and apply buffer modifications. + */ + GridHalo( const HaloType& halo, const ModifyType& modify ) + : _halo( halo ) + , _modify( modify ) + { + } + + /* + \brief Return stored halo + + \return Halo for gather/scatter. + */ + HaloType getHalo() const { return _halo; } + /* + \brief Return stored modification functor. + + \return Modify object to access or apply buffer modifications. + */ + ModifyType getModify() const { return _modify; } +}; + +//---------------------------------------------------------------------------// +/*! + \brief Determine which data should be ghosted on another decomposition, using + bounds of a Cajita grid and taking periodic boundaries into account. AoSoA + variant. + + \tparam LocalGridType Cajita LocalGrid type. + + \tparam ParticleContainer AoSoA type. + \param local_grid The local grid for creating halo and periodicity. + + \param particles The particle AoSoA, containing positions. + + \param PositionIndex Particle position index within the AoSoA. + + \param min_halo_width Number of halo mesh widths to include for ghosting. + + \param max_export_guess The allocation size for halo export ranks, IDs, and + periodic shifts. + + \return GridHalo containing Halo and PeriodicModify. +*/ +template +auto createGridHalo( + const LocalGridType& local_grid, const ParticleContainer& particles, + std::integral_constant, + const int min_halo_width, const int max_export_guess = 0, + typename std::enable_if::value, int>::type* = + 0 ) +{ + using device_type = typename ParticleContainer::device_type; + + auto positions = slice( particles ); + auto pair = Impl::getHaloIDs( local_grid, positions, min_halo_width, + max_export_guess ); + using halo_type = Halo; + halo_type halo = pair.first; + auto shifts = pair.second; + + // Create the functor for modifying the buffer. + using modify_type = PeriodicModifyAoSoA; + auto periodic_modify = modify_type( shifts ); + + // Return Halo and PeriodicModify together. + GridHalo grid_halo( halo, periodic_modify ); + return grid_halo; +} + +//---------------------------------------------------------------------------// +/*! + \brief Determine which data should be ghosted on another decomposition, using + bounds of a Cajita grid and taking periodic boundaries into account. Slice + variant. + + \tparam LocalGridType Cajita LocalGrid type. + + \tparam PositionSliceType Slice type. + + \param local_grid The local grid for creating halo and periodicity. + + \param positions The position slice. + + \param min_halo_width Number of halo mesh widths to include for ghosting. + + \param max_export_guess The allocation size for halo export ranks, IDs, and + periodic shifts. + + \return GridHalo containing Halo and PeriodicModify. +*/ +template +auto createGridHalo( + const LocalGridType& local_grid, const PositionSliceType& positions, + const int min_halo_width, const int max_export_guess = 0, + typename std::enable_if::value, int>::type* = + 0 ) +{ + using device_type = typename PositionSliceType::device_type; + + auto pair = Impl::getHaloIDs( local_grid, positions, min_halo_width, + max_export_guess ); + using halo_type = Halo; + halo_type halo = pair.first; + auto shifts = pair.second; + + // Create the functor for modifying the buffer. + using modify_type = PeriodicModifySlice; + auto periodic_modify = modify_type( shifts ); + + // Return Halo and PeriodicModify together. + GridHalo grid_halo( halo, periodic_modify ); + return grid_halo; +} + +//---------------------------------------------------------------------------// +/*! + \brief Gather data from one decomposition and ghosts on another decomposition, + using the bounds and periodicity of a Cajita grid to determine which particles + should be copied. AoSoA variant. + + \tparam GridHaloType GridHalo type - contained ModifyType must have an + AoSoA-compatible functor to modify the buffer. + + \tparam ParticleContainer AoSoA type. + + \param grid_halo The communication halo taking into account periodic + boundaries. + + \param particles The particle AoSoA, containing positions. +*/ +template +void gridGather( const GridHaloType grid_halo, ParticleContainer& particles, + typename std::enable_if::value, + int>::type* = 0 ) +{ + auto halo = grid_halo.getHalo(); + auto modify = grid_halo.getModify(); + particles.resize( halo.numLocal() + halo.numGhost() ); + + gather( halo, particles, modify ); +} + +//---------------------------------------------------------------------------// +/*! + \brief Gather data from one decomposition and ghosts on another decomposition, + using the bounds and periodicity of a Cajita grid to determine which particles + should be copied. Slice variant. + + \tparam GridHaloType GridHalo type - contained ModifyType must have a + slice-compatible functor to modify the buffer. + + \tparam PositionSliceType Slice type. + + \param grid_halo The communication halo taking into account periodic + boundaries. + + \param positions The position slice. +*/ +template +void gridGather( const GridHaloType grid_halo, PositionSliceType& positions, + typename std::enable_if::value, + int>::type* = 0 ) +{ + auto halo = grid_halo.getHalo(); + auto modify = grid_halo.getModify(); + + // Must be resized to match local/ghost externally. + gather( halo, positions, modify ); +} + +} // namespace Cabana + +#endif // end CABANA_GRIDCOMM_HPP diff --git a/core/unit_test/CMakeLists.txt b/core/unit_test/CMakeLists.txt index e64cafe3b..54b7a38ab 100644 --- a/core/unit_test/CMakeLists.txt +++ b/core/unit_test/CMakeLists.txt @@ -96,4 +96,7 @@ if(Cabana_ENABLE_ARBORX) endif() if(Cabana_ENABLE_MPI) Cabana_add_tests(MPI NAMES CommunicationPlan Distributor Halo) + if(Cabana_ENABLE_CAJITA) + Cabana_add_tests(MPI NAMES ParticleGridCommunication) + endif() endif() diff --git a/core/unit_test/tstCommunicationPlan.hpp b/core/unit_test/tstCommunicationPlan.hpp index 3fe15431c..699ca6f3b 100644 --- a/core/unit_test/tstCommunicationPlan.hpp +++ b/core/unit_test/tstCommunicationPlan.hpp @@ -576,6 +576,29 @@ void test7( const bool use_topology ) EXPECT_EQ( n, host_steering( n ) ); } +//---------------------------------------------------------------------------// +void testTopology() +{ + // Get my rank. + int my_rank = -1; + MPI_Comm_rank( MPI_COMM_WORLD, &my_rank ); + + std::vector neighbor_ranks = { 0, 2, 2, 1, 1, 1, + 2, 3, 10, 27, my_rank }; + auto unique_ranks = Cabana::Impl::getUniqueTopology( neighbor_ranks ); + + // Check this rank is first. + EXPECT_EQ( my_rank, unique_ranks[0] ); + + // Check each rank is unique. + for ( std::size_t n = 0; n < unique_ranks.size(); ++n ) + for ( std::size_t m = 0; m < unique_ranks.size(); ++m ) + if ( n != m ) + { + EXPECT_NE( unique_ranks[n], unique_ranks[m] ); + } +} + //---------------------------------------------------------------------------// // RUN TESTS //---------------------------------------------------------------------------// @@ -607,6 +630,8 @@ TEST( TEST_CATEGORY, comm_plan_test_6_no_topo ) { test6( false ); } TEST( TEST_CATEGORY, comm_plan_test_7_no_topo ) { test7( false ); } +TEST( TEST_CATEGORY, comm_plan_test_topology ) { testTopology(); } + //---------------------------------------------------------------------------// } // end namespace Test diff --git a/core/unit_test/tstParticleGridCommunication.hpp b/core/unit_test/tstParticleGridCommunication.hpp new file mode 100644 index 000000000..7a9b632c6 --- /dev/null +++ b/core/unit_test/tstParticleGridCommunication.hpp @@ -0,0 +1,467 @@ +/**************************************************************************** + * Copyright (c) 2018-2020 by the Cabana authors * + * All rights reserved. * + * * + * This file is part of the Cabana library. Cabana is distributed under a * + * BSD 3-clause license. For the licensing terms see the LICENSE file in * + * the top-level directory. * + * * + * SPDX-License-Identifier: BSD-3-Clause * + ****************************************************************************/ + +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include + +#include + +#include + +#include +#include +#include + +namespace Test +{ + +//---------------------------------------------------------------------------// +// Shared test settings. +struct PGCommTestData +{ + using DataTypes = Cabana::MemberTypes; + Cabana::AoSoA data_host; + + std::shared_ptr>> local_grid; + + int num_data = 27; + + double lo_x, lo_y, lo_z, hi_x, hi_y, hi_z; + double ghost_x, ghost_y, ghost_z; + double global_lo_x, global_lo_y, global_lo_z; + double global_hi_x, global_hi_y, global_hi_z; + + PGCommTestData( bool ghost, int halo_width ) + { + // Create the MPI partitions. + Cajita::UniformDimPartitioner partitioner; + + // Create the global MPI decomposition mesh. + std::array lo_corner = { -3.2, -0.5, 2.2 }; + std::array hi_corner = { -0.2, 5.9, 4.2 }; + std::array num_cell = { 10, 10, 10 }; + auto global_mesh = + Cajita::createUniformGlobalMesh( lo_corner, hi_corner, num_cell ); + + // Create the global grid. + std::array is_periodic = { true, true, true }; + auto global_grid = Cajita::createGlobalGrid( + MPI_COMM_WORLD, global_mesh, is_periodic, partitioner ); + + // Create a grid local_grid with large enough halo for test loops below. + local_grid = Cajita::createLocalGrid( global_grid, 4 ); + auto local_mesh = Cajita::createLocalMesh( *local_grid ); + + // Make some data to migrate, one for each neighbor (and one in the + // center). + data_host = + Cabana::AoSoA( "host", num_data ); + auto pos_host = Cabana::slice<1>( data_host ); + + // Get mesh info. + auto dx = + local_grid->globalGrid().globalMesh().cellSize( Cajita::Dim::I ); + auto dy = + local_grid->globalGrid().globalMesh().cellSize( Cajita::Dim::J ); + auto dz = + local_grid->globalGrid().globalMesh().cellSize( Cajita::Dim::K ); + double width_x, width_y, width_z; + // Create data near the boundary, either at the edge of the local domain + // or at the edge of the ghost region. + if ( ghost ) + { + lo_x = local_mesh.lowCorner( Cajita::Ghost(), Cajita::Dim::I ); + lo_y = local_mesh.lowCorner( Cajita::Ghost(), Cajita::Dim::J ); + lo_z = local_mesh.lowCorner( Cajita::Ghost(), Cajita::Dim::K ); + width_x = + local_mesh.extent( Cajita::Ghost(), Cajita::Dim::I ) / 2.0; + width_y = + local_mesh.extent( Cajita::Ghost(), Cajita::Dim::J ) / 2.0; + width_z = + local_mesh.extent( Cajita::Ghost(), Cajita::Dim::K ) / 2.0; + } + else + { + lo_x = local_mesh.lowCorner( Cajita::Own(), Cajita::Dim::I ); + lo_y = local_mesh.lowCorner( Cajita::Own(), Cajita::Dim::J ); + lo_z = local_mesh.lowCorner( Cajita::Own(), Cajita::Dim::K ); + width_x = local_mesh.extent( Cajita::Own(), Cajita::Dim::I ) / 2.0; + width_y = local_mesh.extent( Cajita::Own(), Cajita::Dim::J ) / 2.0; + width_z = local_mesh.extent( Cajita::Own(), Cajita::Dim::K ) / 2.0; + } + auto center_x = width_x + lo_x; + auto center_y = width_y + lo_y; + auto center_z = width_z + lo_z; + auto shift_x = width_x - ( halo_width - 0.1 ) * dx; + auto shift_y = width_y - ( halo_width - 0.1 ) * dy; + auto shift_z = width_z - ( halo_width - 0.1 ) * dz; + + // Fill the data. Add particles near the local domain in each direction + // and one in the center (that should never move). + int nr = 0; + for ( int k = -1; k < 2; ++k ) + for ( int j = -1; j < 2; ++j ) + for ( int i = -1; i < 2; ++i, ++nr ) + { + pos_host( nr, 0 ) = center_x + i * shift_x; + pos_host( nr, 1 ) = center_y + j * shift_y; + pos_host( nr, 2 ) = center_z + k * shift_z; + } + + // Add a particle on rank zero to force some resizing for sends. + int my_rank = -1; + MPI_Comm_rank( MPI_COMM_WORLD, &my_rank ); + if ( my_rank == 0 ) + { + num_data++; + data_host.resize( num_data ); + auto pos = Cabana::slice<1>( data_host ); + pos_host( num_data - 1, 0 ) = center_x + shift_x; + pos_host( num_data - 1, 1 ) = center_y + shift_y; + pos_host( num_data - 1, 2 ) = center_z + shift_z; + } + + hi_x = local_mesh.highCorner( Cajita::Ghost(), Cajita::Dim::I ); + hi_y = local_mesh.highCorner( Cajita::Ghost(), Cajita::Dim::J ); + hi_z = local_mesh.highCorner( Cajita::Ghost(), Cajita::Dim::K ); + + ghost_x = halo_width * dx; + ghost_y = halo_width * dy; + ghost_z = halo_width * dz; + + global_hi_x = global_mesh->highCorner( Cajita::Dim::I ); + global_hi_y = global_mesh->highCorner( Cajita::Dim::J ); + global_hi_z = global_mesh->highCorner( Cajita::Dim::K ); + global_lo_x = global_mesh->lowCorner( Cajita::Dim::I ); + global_lo_y = global_mesh->lowCorner( Cajita::Dim::J ); + global_lo_z = global_mesh->lowCorner( Cajita::Dim::K ); + } +}; + +//---------------------------------------------------------------------------// +void testMigrate( const int halo_width, const int test_halo_width, + const bool force_comm, const int test_type ) +{ + PGCommTestData test_data( true, halo_width ); + auto data_host = test_data.data_host; + auto local_grid = *( test_data.local_grid ); + int num_data = test_data.num_data; + + using DataTypes = Cabana::MemberTypes; + Cabana::AoSoA initial( "initial", num_data ); + Cabana::deep_copy( initial, data_host ); + auto pos_initial = Cabana::slice<1>( initial ); + + // Copy to the device. + Cabana::AoSoA data_src( "data_src", num_data ); + Cabana::deep_copy( data_src, data_host ); + + // Do the migration in-place. + if ( test_type == 0 ) + { + Cabana::gridMigrate( local_grid, data_src, + std::integral_constant(), + test_halo_width, force_comm ); + + data_host.resize( data_src.size() ); + Cabana::deep_copy( data_host, data_src ); + } + // Do the migration with a separate destination AoSoA. + else if ( test_type == 1 ) + { + Cabana::AoSoA data_dst( "data_dst", + num_data ); + Cabana::gridMigrate( local_grid, data_src, + std::integral_constant(), data_dst, + test_halo_width, force_comm ); + + data_host.resize( data_dst.size() ); + Cabana::deep_copy( data_host, data_dst ); + } + // Do the migration with separate slices (need to use createGridDistributor + // directly since slices can't be resized). + else if ( test_type == 2 ) + { + auto pos_src = Cabana::slice<1>( data_src ); + int comm_count = 0; + if ( !force_comm ) + { + // Check to see if we need to communicate. + comm_count = migrateCount( local_grid, pos_src, test_halo_width ); + } + + if ( force_comm || comm_count > 0 ) + { + auto distributor = + Cabana::createGridDistributor( local_grid, pos_src ); + Cabana::AoSoA data_dst( + "data_dst", distributor.totalNumImport() ); + auto pos_dst = Cabana::slice<1>( data_dst ); + Cabana::migrate( distributor, pos_src, pos_dst ); + + data_host.resize( data_dst.size() ); + Cabana::deep_copy( data_host, data_dst ); + } + else + { + data_host.resize( data_src.size() ); + Cabana::deep_copy( data_host, data_src ); + } + } + + // Check the results. + int new_num_data = data_host.size(); + auto pos_host = Cabana::slice<1>( data_host ); + + for ( int i = 0; i < new_num_data; ++i ) + { + // Make sure particles haven't moved if within allowable halo mesh and + // migrate is not being forced. + if ( !force_comm && test_halo_width < halo_width ) + { + EXPECT_DOUBLE_EQ( pos_host( i, Cajita::Dim::I ), + pos_initial( i, Cajita::Dim::I ) ); + EXPECT_DOUBLE_EQ( pos_host( i, Cajita::Dim::J ), + pos_initial( i, Cajita::Dim::J ) ); + EXPECT_DOUBLE_EQ( pos_host( i, Cajita::Dim::K ), + pos_initial( i, Cajita::Dim::K ) ); + } + else + { + // Make sure everything was wrapped into the global domain. + EXPECT_LE( pos_host( i, Cajita::Dim::I ), test_data.global_hi_x ); + EXPECT_LE( pos_host( i, Cajita::Dim::J ), test_data.global_hi_y ); + EXPECT_LE( pos_host( i, Cajita::Dim::K ), test_data.global_hi_z ); + EXPECT_GE( pos_host( i, Cajita::Dim::I ), test_data.global_lo_x ); + EXPECT_GE( pos_host( i, Cajita::Dim::J ), test_data.global_lo_y ); + EXPECT_GE( pos_host( i, Cajita::Dim::K ), test_data.global_lo_z ); + + // Make sure everything was wrapped into the local domain. + EXPECT_LE( pos_host( i, Cajita::Dim::I ), test_data.hi_x ); + EXPECT_LE( pos_host( i, Cajita::Dim::J ), test_data.hi_y ); + EXPECT_LE( pos_host( i, Cajita::Dim::K ), test_data.hi_z ); + EXPECT_GE( pos_host( i, Cajita::Dim::I ), test_data.lo_x ); + EXPECT_GE( pos_host( i, Cajita::Dim::J ), test_data.lo_y ); + EXPECT_GE( pos_host( i, Cajita::Dim::K ), test_data.lo_z ); + } + } + + // Make sure the particles are still unique. + int final_total = 0; + int initial_total = 0; + MPI_Allreduce( &new_num_data, &final_total, 1, MPI_INT, MPI_SUM, + MPI_COMM_WORLD ); + MPI_Allreduce( &num_data, &initial_total, 1, MPI_INT, MPI_SUM, + MPI_COMM_WORLD ); + EXPECT_EQ( final_total, initial_total ); +} + +//---------------------------------------------------------------------------// +void testGather( const int halo_width, const int test_halo_width, + const int test_type ) +{ + PGCommTestData test_data( false, halo_width ); + auto data_host = test_data.data_host; + auto local_grid = *( test_data.local_grid ); + int num_data = test_data.num_data; + + using DataTypes = Cabana::MemberTypes; + Cabana::AoSoA initial( "initial", num_data ); + Cabana::deep_copy( initial, data_host ); + auto pos_initial = Cabana::slice<1>( initial ); + + // Copy to the device. + Cabana::AoSoA data_src( "data_src", num_data ); + Cabana::deep_copy( data_src, data_host ); + + if ( test_type == 0 ) + { + // Do the gather with an AoAoA. + auto grid_halo = Cabana::createGridHalo( + local_grid, data_src, std::integral_constant(), + test_halo_width ); + gridGather( grid_halo, data_src ); + + data_host.resize( data_src.size() ); + Cabana::deep_copy( data_host, data_src ); + } + else if ( test_type == 1 ) + { + // Create the halo with a slice. + auto pos_src = Cabana::slice<1>( data_src ); + auto grid_halo = + Cabana::createGridHalo( local_grid, pos_src, test_halo_width ); + + // Resize (cannot resize slice). + auto halo = grid_halo.getHalo(); + data_src.resize( halo.numLocal() + halo.numGhost() ); + pos_src = Cabana::slice<1>( data_src ); + + // Gather with slice. + gridGather( grid_halo, pos_src ); + + data_host.resize( data_src.size() ); + Cabana::deep_copy( data_host, data_src ); + } + + // Check the results. + int new_num_data = data_host.size(); + auto pos_host = Cabana::slice<1>( data_host ); + + // Make sure owned particles haven't changed. + for ( int i = 0; i < num_data; ++i ) + { + EXPECT_DOUBLE_EQ( pos_host( i, Cajita::Dim::I ), + pos_initial( i, Cajita::Dim::I ) ); + EXPECT_DOUBLE_EQ( pos_host( i, Cajita::Dim::J ), + pos_initial( i, Cajita::Dim::J ) ); + EXPECT_DOUBLE_EQ( pos_host( i, Cajita::Dim::K ), + pos_initial( i, Cajita::Dim::K ) ); + } + for ( int i = num_data; i < new_num_data; ++i ) + { + // Make sure particles haven't moved if within allowable halo mesh. + if ( test_halo_width < halo_width ) + { + EXPECT_DOUBLE_EQ( pos_host( i, Cajita::Dim::I ), + pos_initial( i, Cajita::Dim::I ) ); + EXPECT_DOUBLE_EQ( pos_host( i, Cajita::Dim::J ), + pos_initial( i, Cajita::Dim::J ) ); + EXPECT_DOUBLE_EQ( pos_host( i, Cajita::Dim::K ), + pos_initial( i, Cajita::Dim::K ) ); + } + else + { + bool within_x = true; + bool within_y = true; + bool within_z = true; + // Make sure all ghosts are in halo region in at least one + // direction. + if ( pos_host( i, Cajita::Dim::I ) < test_data.lo_x ) + { + EXPECT_LE( pos_host( i, Cajita::Dim::I ), test_data.lo_x ); + EXPECT_GE( pos_host( i, Cajita::Dim::I ), + test_data.lo_x - test_data.ghost_x ); + } + else if ( pos_host( i, Cajita::Dim::I ) > test_data.hi_x ) + { + + EXPECT_GE( pos_host( i, Cajita::Dim::I ), test_data.hi_x ); + EXPECT_LE( pos_host( i, Cajita::Dim::I ), + test_data.hi_x + test_data.ghost_x ); + } + else + { + within_x = false; + } + if ( pos_host( i, Cajita::Dim::J ) < test_data.lo_y ) + { + EXPECT_LE( pos_host( i, Cajita::Dim::J ), test_data.lo_y ); + EXPECT_GE( pos_host( i, Cajita::Dim::J ), + test_data.lo_y - test_data.ghost_y ); + } + else if ( pos_host( i, Cajita::Dim::J ) > test_data.hi_y ) + { + EXPECT_GE( pos_host( i, Cajita::Dim::J ), test_data.hi_y ); + EXPECT_LE( pos_host( i, Cajita::Dim::J ), + test_data.hi_y + test_data.ghost_y ); + } + else + { + within_y = false; + } + if ( pos_host( i, Cajita::Dim::K ) < test_data.lo_z ) + { + EXPECT_LE( pos_host( i, Cajita::Dim::K ), test_data.lo_z ); + EXPECT_GE( pos_host( i, Cajita::Dim::K ), + test_data.lo_z - test_data.ghost_z ); + } + else if ( pos_host( i, Cajita::Dim::K ) > test_data.hi_z ) + { + EXPECT_GE( pos_host( i, Cajita::Dim::K ), test_data.hi_z ); + EXPECT_LE( pos_host( i, Cajita::Dim::K ), + test_data.hi_z + test_data.ghost_z ); + } + else + { + within_z = false; + } + if ( !within_x && !within_y && !within_z ) + { + FAIL() << "Ghost particle outside ghost region." + << pos_host( i, 0 ) << " " << pos_host( i, 1 ) << " " + << pos_host( i, 2 ); + } + } + } + + // TODO: check number of ghosts created. +} + +//---------------------------------------------------------------------------// +// RUN TESTS +//---------------------------------------------------------------------------// + +TEST( TEST_CATEGORY, periodic_test_migrate_aosoa ) +{ + // Call with varied halo data and halo width, without forced communication + for ( int i = 1; i < 4; i++ ) + for ( int j = 0; j < 4; j++ ) + testMigrate( i, j, false, 0 ); + + // Call with forced communication + testMigrate( 1, 1, true, 0 ); +} + +TEST( TEST_CATEGORY, periodic_test_migrate_aosoa_separate ) +{ + for ( int i = 1; i < 4; i++ ) + for ( int j = 1; j < 4; j++ ) + testMigrate( i, j, false, 1 ); + + testMigrate( 1, 1, true, 1 ); +} + +TEST( TEST_CATEGORY, periodic_test_migrate_slice ) +{ + for ( int i = 1; i < 4; i++ ) + for ( int j = 1; j < 4; j++ ) + testMigrate( i, j, false, 2 ); + + testMigrate( 1, 1, true, 2 ); +} + +TEST( TEST_CATEGORY, periodic_test_gather_aosoa ) +{ + for ( int i = 1; i < 4; i++ ) + for ( int j = 1; j < 4; j++ ) + testGather( i, j, 0 ); +} + +TEST( TEST_CATEGORY, periodic_test_gather_slice ) +{ + for ( int i = 1; i < 4; i++ ) + for ( int j = 1; j < 4; j++ ) + testGather( i, j, 1 ); +} +//---------------------------------------------------------------------------// + +} // end namespace Test