diff --git a/core/src/Cabana_CommunicationPlan.hpp b/core/src/Cabana_CommunicationPlan.hpp index bdb8dac3e..2a71e3bf7 100644 --- a/core/src/Cabana_CommunicationPlan.hpp +++ b/core/src/Cabana_CommunicationPlan.hpp @@ -30,6 +30,58 @@ namespace Cabana { namespace Impl { +// Put this rank first. +void selfFirstTopology( std::vector& topology ) +{ + int my_rank = -1; + MPI_Comm_rank( MPI_COMM_WORLD, &my_rank ); + for ( auto& n : topology ) + { + if ( n == my_rank ) + { + std::swap( n, topology[0] ); + break; + } + } +} + +std::vector getUniqueTopology( std::vector topology ) +{ + auto remove_end = std::remove( topology.begin(), topology.end(), -1 ); + std::sort( topology.begin(), remove_end ); + auto unique_end = std::unique( topology.begin(), remove_end ); + topology.resize( std::distance( topology.begin(), unique_end ) ); + + selfFirstTopology( topology ); + return topology; +} + +// Sort and make neighbor topology unique, with this rank first. +std::vector +getUniqueTopology( Kokkos::View topology ) +{ + std::vector unique_topology( topology.extent( 0 ) ); + for ( std::size_t n = 0; n < topology.extent( 0 ); ++n ) + unique_topology[n] = topology( n ); + + unique_topology = getUniqueTopology( unique_topology ); + selfFirstTopology( unique_topology ); + return unique_topology; +} + +auto getUniqueTopologyView( Kokkos::View topology ) +{ + std::vector unique_topology; + unique_topology = getUniqueTopology( topology ); + + Kokkos::View unique_view( "unique_neighbors", + unique_topology.size() ); + for ( std::size_t n = 0; n < unique_topology.size(); ++n ) + unique_view( n ) = unique_topology[n]; + + return unique_view; +} + //---------------------------------------------------------------------------// // Count sends and create steering algorithm tags. struct CountSendsAndCreateSteeringDuplicated @@ -456,19 +508,16 @@ class CommunicationPlan this rank, just use this rank as the export destination and the data will be efficiently migrated. */ - template + template Kokkos::View createFromExportsAndTopology( const ViewType& element_export_ranks, - const std::vector& neighbor_ranks ) + const NeighType& neighbor_ranks ) { // Store the number of export elements. _num_export_element = element_export_ranks.size(); - // Store the unique neighbors. - _neighbors = neighbor_ranks; - std::sort( _neighbors.begin(), _neighbors.end() ); - auto unique_end = std::unique( _neighbors.begin(), _neighbors.end() ); - _neighbors.resize( std::distance( _neighbors.begin(), unique_end ) ); + // Store the unique neighbors (this rank first). + _neighbors = Impl::getUniqueTopology( neighbor_ranks ); int num_n = _neighbors.size(); // Get the size of this communicator. @@ -483,15 +532,6 @@ class CommunicationPlan // communication space so any mpi tag will do. const int mpi_tag = 1221; - // If we are sending to ourself put that one first in the neighbor - // list. - for ( auto& n : _neighbors ) - if ( n == my_rank ) - { - std::swap( n, _neighbors[0] ); - break; - } - // Initialize import/export sizes. _num_export.assign( num_n, 0 ); _num_import.assign( num_n, 0 ); diff --git a/core/src/Cabana_Distributor.hpp b/core/src/Cabana_Distributor.hpp index 82479d2fa..b9eeb3c83 100644 --- a/core/src/Cabana_Distributor.hpp +++ b/core/src/Cabana_Distributor.hpp @@ -70,6 +70,9 @@ class Distributor : public CommunicationPlan \tparam ViewType The container type for the export element ranks. This container type can be either a Kokkos View or a Cabana Slice. + \tparam NeighType The container type for the neighbor ranks. This + container type can be a Kokkos View or a std::vector. + \param comm The MPI communicator over which the distributor is defined. \param element_export_ranks The destination rank in the target @@ -93,9 +96,9 @@ class Distributor : public CommunicationPlan this rank, just use this rank as the export destination and the data will be efficiently migrated. */ - template + template Distributor( MPI_Comm comm, const ViewType& element_export_ranks, - const std::vector& neighbor_ranks ) + const NeighType& neighbor_ranks ) : CommunicationPlan( comm ) { auto neighbor_ids = this->createFromExportsAndTopology( diff --git a/core/src/Cabana_Halo.hpp b/core/src/Cabana_Halo.hpp index 23746cf20..76775d80c 100644 --- a/core/src/Cabana_Halo.hpp +++ b/core/src/Cabana_Halo.hpp @@ -21,6 +21,7 @@ #include #include +#include #include namespace Cabana @@ -72,6 +73,9 @@ class Halo : public CommunicationPlan ranks. This container type can be either a Kokkos View or a Cabana Slice. + \tparam NeighType The container type for the neighbor ranks. This + container type can be a Kokkos View or a std::vector. + \param comm The MPI communicator over which the halo is defined. \param num_local The number of locally-owned elements on this rank. @@ -97,11 +101,11 @@ class Halo : public CommunicationPlan \note Calling this function completely updates the state of this object and invalidates the previous state. */ - template + template Halo( MPI_Comm comm, const std::size_t num_local, const IdViewType& element_export_ids, const RankViewType& element_export_ranks, - const std::vector& neighbor_ranks ) + const NeighType& neighbor_ranks ) : CommunicationPlan( comm ) , _num_local( num_local ) { @@ -199,6 +203,19 @@ struct is_halo : public is_halo_impl::type>::type { }; +template +struct is_callable : public std::false_type +{ +}; + +template +struct is_callable< + T, typename std::enable_if< + std::is_same::value>::type> + : std::true_type +{ +}; + namespace Impl { @@ -229,9 +246,9 @@ void sendBuffer( const Halo_t& halo, AoSoA_t& aosoa, View_t& send_buffer ) Kokkos::fence(); } -template -void sendBuffer( const Halo_t& halo, AoSoA_t& aosoa, View_t& send_buffer, - const Modify_t& modify_functor ) +template +void sendModifiedBuffer( const Halo_t& halo, AoSoA_t& aosoa, + View_t& send_buffer ) { // Get the steering vector for the sends. auto steering = halo.getExportSteering(); @@ -241,7 +258,7 @@ void sendBuffer( const Halo_t& halo, AoSoA_t& aosoa, View_t& send_buffer, auto gather_send_buffer_func = KOKKOS_LAMBDA( const std::size_t i ) { send_buffer( i ) = aosoa.getTuple( steering( i ) ); - modify_functor( send_buffer, i ); + halo( send_buffer, i ); }; Kokkos::RangePolicy gather_send_buffer_policy( 0, halo.totalNumExport() ); @@ -335,28 +352,28 @@ void recvBuffer( const Halo_t& halo, AoSoA_t& aosoa, const View_t& send_buffer ) element in the locally owned decomposition will be the value assigned to the element in the ghosted decomposition. - \tparam Halo_t Halo type - must be a Halo. + \tparam Halo_t Halo type - must be derived from Halo with a functor defined to + modify the send buffer. \tparam AoSoA_t AoSoA type - must be an AoSoA. - \tparam Modify_t Buffer modification type. - - \param halo The halo to use for the gather. + \param halo The halo to use for the gather and send buffer modification. \param aosoa The AoSoA on which to perform the gather. The AoSoA should have a size equivalent to halo.numGhost() + halo.numLocal(). The locally owned elements are expected to appear first (i.e. in the first halo.numLocal() elements) and the ghosted elements are expected to appear second (i.e. in the next halo.numGhost() elements()). - - \param modify_functor Class containing functor to modify the send buffer - before it's sent (e.g. for periodic coordinate update). */ -template -void gather( const Halo_t& halo, AoSoA_t& aosoa, const Modify_t& modify_functor, - typename std::enable_if<( is_halo::value && - is_aosoa::value ), - int>::type* = 0 ) +template +void gather( + const Halo_t& halo, AoSoA_t& aosoa, + typename std::enable_if< + ( !is_halo::value && // TODO: Should not be needed + std::is_base_of, Halo_t>::value && + // is_callable::value && + is_aosoa::value ), + int>::type* = 0 ) { Impl::checkSize( halo, aosoa ); @@ -366,7 +383,7 @@ void gather( const Halo_t& halo, AoSoA_t& aosoa, const Modify_t& modify_functor, Kokkos::ViewAllocateWithoutInitializing( "halo_send_buffer" ), halo.totalNumExport() ); - Impl::sendBuffer( halo, aosoa, send_buffer, modify_functor ); + Impl::sendModifiedBuffer( halo, aosoa, send_buffer ); Impl::recvBuffer( halo, aosoa, send_buffer ); } diff --git a/core/src/Cabana_ParticleGridCommunication.hpp b/core/src/Cabana_ParticleGridCommunication.hpp index 166b70f28..f6e52a81e 100644 --- a/core/src/Cabana_ParticleGridCommunication.hpp +++ b/core/src/Cabana_ParticleGridCommunication.hpp @@ -23,38 +23,26 @@ #include -#include - namespace Cabana { namespace Impl { + //---------------------------------------------------------------------------// // Of the 27 potential local grids figure out which are in our topology. // Some of the ranks in this list may be invalid. This needs to be updated // after computing destination ranks to only contain valid ranks. template -std::vector getTopology( const LocalGridType& local_grid ) +auto getTopology( const LocalGridType& local_grid ) { - std::vector topology( 27, -1 ); + Kokkos::View topology( + Kokkos::ViewAllocateWithoutInitializing( "neighbors" ), 27 ); int nr = 0; for ( int k = -1; k < 2; ++k ) for ( int j = -1; j < 2; ++j ) for ( int i = -1; i < 2; ++i, ++nr ) - topology[nr] = local_grid.neighborRank( i, j, k ); - return topology; -} - -//---------------------------------------------------------------------------// -// Make the topology a list of unique and valid ranks. They must all be valid, -// but not necessarily unique (enforced within CommunicationPlan). -inline std::vector getUniqueTopology( std::vector topology ) -{ - auto remove_end = std::remove( topology.begin(), topology.end(), -1 ); - std::sort( topology.begin(), remove_end ); - auto unique_end = std::unique( topology.begin(), remove_end ); - topology.resize( std::distance( topology.begin(), unique_end ) ); + topology( nr ) = local_grid.neighborRank( i, j, k ); return topology; } @@ -238,7 +226,7 @@ void getMigrateDestinations( const LocalGridType& local_grid, //---------------------------------------------------------------------------// /*! - \brief gridDistributor determines which data should be migrated from one + \brief Determine which data should be migrated from one uniquely-owned decomposition to another uniquely-owned decomposition, using bounds of a Cajita grid and taking periodic boundaries into account. @@ -261,35 +249,31 @@ gridDistributor( const LocalGridType& local_grid, PositionSliceType& positions ) // Get all 26 neighbor ranks. auto topology = Impl::getTopology( local_grid ); - - Kokkos::View - neighbor_ranks( topology.data(), topology.size() ); - auto nr_mirror = - Kokkos::create_mirror_view_and_copy( device_type(), neighbor_ranks ); + auto topology_mirror = + Kokkos::create_mirror_view_and_copy( device_type(), topology ); Kokkos::View destinations( Kokkos::ViewAllocateWithoutInitializing( "destinations" ), positions.size() ); // Determine destination ranks for all particles and wrap positions across // periodic boundaries. - Impl::getMigrateDestinations( local_grid, nr_mirror, destinations, + Impl::getMigrateDestinations( local_grid, topology_mirror, destinations, positions ); // Ensure neighbor ranks are valid and unique. - auto unique_topology = Impl::getUniqueTopology( topology ); + // auto unique_topology = Impl::getUniqueTopology( topology ); // Create the Cabana distributor. Distributor distributor( local_grid.globalGrid().comm(), - destinations, unique_topology ); + destinations, topology ); return distributor; } //---------------------------------------------------------------------------// /*! - \brief gridMigrate migrates data from one uniquely-owned decomposition to - another uniquely-owned decomposition, using the bounds and periodic boundaries - of a Cajita grid to determine which particles should be moved. In-place - variant. + \brief Migrate data from one uniquely-owned decomposition to another + uniquely-owned decomposition, using the bounds and periodic boundaries of a + Cajita grid to determine which particles should be moved. In-place variant. \tparam LocalGridType Cajita LocalGrid type. @@ -341,9 +325,9 @@ void gridMigrate( const LocalGridType& local_grid, ParticleContainer& particles, //---------------------------------------------------------------------------// /*! - \brief gridMigrate migrates data from one uniquely-owned decomposition to - another uniquely-owned decomposition, using the bounds and periodic boundaries - of a Cajita grid to determine which particles should be moved. Separate AoSoA + \brief Migrate data from one uniquely-owned decomposition to another + uniquely-owned decomposition, using the bounds and periodic boundaries of a + Cajita grid to determine which particles should be moved. Separate AoSoA variant. \tparam LocalGridType Cajita LocalGrid type. @@ -410,166 +394,285 @@ void gridMigrate( const LocalGridType& local_grid, namespace Impl { -//---------------------------------------------------------------------------// -// Locate particles within the local grid and determine if any from this rank -// need to be ghosted to one (or more) of the 26 neighbor ranks, keeping track -// of destination rank, index in the container, and periodic shift needed (but -// not yet applied). + template -void getHaloIds( const LocalGridType& local_grid, CountView& send_count, - DestinationRankView& destinations, DestinationRankView& ids, - ShiftRankView& shifts, const PositionSliceType& positions, - const int minimum_halo_width ) + class DestinationRankView, class ShiftViewType> +struct HaloIds { - using execution_space = typename PositionSliceType::execution_space; - - // Check within the halo width, within the local domain. - const auto& global_grid = local_grid.globalGrid(); - const Kokkos::Array periodic = { - global_grid.isPeriodic( Cajita::Dim::I ), - global_grid.isPeriodic( Cajita::Dim::J ), - global_grid.isPeriodic( Cajita::Dim::K ) }; - auto dx = local_grid.globalGrid().globalMesh().cellSize( Cajita::Dim::I ); - auto dy = local_grid.globalGrid().globalMesh().cellSize( Cajita::Dim::J ); - auto dz = local_grid.globalGrid().globalMesh().cellSize( Cajita::Dim::K ); - const auto& global_mesh = global_grid.globalMesh(); - const Kokkos::Array global_low = { - global_mesh.lowCorner( Cajita::Dim::I ) + minimum_halo_width * dx, - global_mesh.lowCorner( Cajita::Dim::J ) + minimum_halo_width * dy, - global_mesh.lowCorner( Cajita::Dim::K ) + minimum_halo_width * dz }; - const Kokkos::Array global_high = { - global_mesh.highCorner( Cajita::Dim::I ) - minimum_halo_width * dx, - global_mesh.highCorner( Cajita::Dim::J ) - minimum_halo_width * dy, - global_mesh.highCorner( Cajita::Dim::K ) - minimum_halo_width * dz }; - const Kokkos::Array global_extent = { - global_mesh.extent( Cajita::Dim::I ), - global_mesh.extent( Cajita::Dim::J ), - global_mesh.extent( Cajita::Dim::K ) }; - const auto& local_mesh = - Cajita::createLocalMesh( local_grid ); - - int my_rank = -1; - MPI_Comm_rank( MPI_COMM_WORLD, &my_rank ); - - auto policy = Kokkos::RangePolicy( 0, positions.size() ); + Kokkos::Array _periodic; + Kokkos::Array _global_low; + Kokkos::Array _global_high; + Kokkos::Array _global_extent; + + int _min_halo; + int _neighbor_rank; + + CountView _send_count; + DestinationRankView _destinations; + DestinationRankView _ids; + ShiftViewType _shifts; + PositionSliceType _positions; + + Kokkos::Array _ijk; + Kokkos::Array _min_coord; + Kokkos::Array _max_coord; + + HaloIds( const LocalGridType& local_grid, + const PositionSliceType& positions, CountView& send_count, + DestinationRankView& destinations, DestinationRankView& ids, + ShiftViewType& shifts, const int minimum_halo_width ) + { + _send_count = send_count; + _destinations = destinations; + _ids = ids; + _shifts = shifts; + _positions = positions; + + // Check within the halo width, within the local domain. + const auto& global_grid = local_grid.globalGrid(); + _periodic = { global_grid.isPeriodic( Cajita::Dim::I ), + global_grid.isPeriodic( Cajita::Dim::J ), + global_grid.isPeriodic( Cajita::Dim::K ) }; + auto dx = + local_grid.globalGrid().globalMesh().cellSize( Cajita::Dim::I ); + auto dy = + local_grid.globalGrid().globalMesh().cellSize( Cajita::Dim::J ); + auto dz = + local_grid.globalGrid().globalMesh().cellSize( Cajita::Dim::K ); + const auto& global_mesh = global_grid.globalMesh(); + _global_low = { + global_mesh.lowCorner( Cajita::Dim::I ) + minimum_halo_width * dx, + global_mesh.lowCorner( Cajita::Dim::J ) + minimum_halo_width * dy, + global_mesh.lowCorner( Cajita::Dim::K ) + minimum_halo_width * dz }; + _global_high = { + global_mesh.highCorner( Cajita::Dim::I ) - minimum_halo_width * dx, + global_mesh.highCorner( Cajita::Dim::J ) - minimum_halo_width * dy, + global_mesh.highCorner( Cajita::Dim::K ) - + minimum_halo_width * dz }; + _global_extent = { global_mesh.extent( Cajita::Dim::I ), + global_mesh.extent( Cajita::Dim::J ), + global_mesh.extent( Cajita::Dim::K ) }; + + _min_halo = minimum_halo_width; + + build( local_grid ); + } - // Add a ghost if this particle is near the local boundary, potentially - // for each of the 26 neighbors cells. Do this one neighbor rank at a time - // so that sends are contiguous. - auto topology = getTopology( local_grid ); - auto unique_topology = getUniqueTopology( topology ); - // Put this rank first to make sure shifts match CommPlan export_ranks. - for ( auto& n : unique_topology ) + KOKKOS_INLINE_FUNCTION void operator()( const int p ) const { - if ( n == my_rank ) + Kokkos::Array pos = { _positions( p, Cajita::Dim::I ), + _positions( p, Cajita::Dim::J ), + _positions( p, Cajita::Dim::K ) }; + + // Check the if particle is both in the owned space + // and the ghosted space of this neighbor (ignore + // the current cell). + if ( ( pos[Cajita::Dim::I] > _min_coord[Cajita::Dim::I] && + pos[Cajita::Dim::I] < _max_coord[Cajita::Dim::I] ) && + ( pos[Cajita::Dim::J] > _min_coord[Cajita::Dim::J] && + pos[Cajita::Dim::J] < _max_coord[Cajita::Dim::J] ) && + ( pos[Cajita::Dim::K] > _min_coord[Cajita::Dim::K] && + pos[Cajita::Dim::K] < _max_coord[Cajita::Dim::K] ) ) { - std::swap( n, unique_topology[0] ); - break; + const std::size_t sc = _send_count()++; + // If the size of the arrays is exceeded, keep + // counting to resize and fill next. + if ( sc < _destinations.extent( 0 ) ) + { + // Keep the destination MPI rank. + _destinations( sc ) = _neighbor_rank; + // Keep the particle ID. + _ids( sc ) = p; + // Determine if this ghost particle needs to + // be shifted through the periodic boundary. + for ( int d = 0; d < 3; ++d ) + { + _shifts( sc, d ) = 0.0; + if ( _periodic[d] && _ijk[d] ) + { + if ( pos[d] > _global_high[d] ) + _shifts( sc, d ) = -_global_extent[d]; + else if ( pos[d] < _global_low[d] ) + _shifts( sc, d ) = _global_extent[d]; + } + } + } } } - for ( std::size_t ar = 0; ar < unique_topology.size(); ar++ ) + + //---------------------------------------------------------------------------// + // Locate particles within the local grid and determine if any from this + // rank need to be ghosted to one (or more) of the 26 neighbor ranks, + // keeping track of destination rank, index in the container, and periodic + // shift needed (but not yet applied). + void build( const LocalGridType& local_grid ) { - // TODO: possible grid_parallel_for - int nr = 0; - for ( int k = -1; k < 2; ++k ) - for ( int j = -1; j < 2; ++j ) - for ( int i = -1; i < 2; ++i, ++nr ) + using execution_space = typename PositionSliceType::execution_space; + const auto& local_mesh = + Cajita::createLocalMesh( local_grid ); + + auto policy = + Kokkos::RangePolicy( 0, _positions.size() ); + + // Add a ghost if this particle is near the local boundary, potentially + // for each of the 26 neighbors cells. Do this one neighbor rank at a + // time so that sends are contiguous. + auto topology = getTopology( local_grid ); + auto unique_topology = getUniqueTopologyView( topology ); + for ( std::size_t ar = 0; ar < unique_topology.size(); ar++ ) + { + int nr = 0; + for ( int k = -1; k < 2; ++k ) + { + for ( int j = -1; j < 2; ++j ) { - const int neighbor_rank = topology.at( nr ); - if ( neighbor_rank == unique_topology.at( ar ) ) + for ( int i = -1; i < 2; ++i, ++nr ) { - auto sis = local_grid.sharedIndexSpace( - Cajita::Own(), Cajita::Cell(), i, j, k, - minimum_halo_width ); - const int min_ind_i = sis.min( Cajita::Dim::I ); - const int min_ind_j = sis.min( Cajita::Dim::J ); - const int min_ind_k = sis.min( Cajita::Dim::K ); - Kokkos::Array min_ind = { min_ind_i, min_ind_j, - min_ind_k }; - const int max_ind_i = sis.max( Cajita::Dim::I ) + 1; - const int max_ind_j = sis.max( Cajita::Dim::J ) + 1; - const int max_ind_k = sis.max( Cajita::Dim::K ) + 1; - Kokkos::Array max_ind = { max_ind_i, max_ind_j, - max_ind_k }; - - Kokkos::Array min_coord; - Kokkos::Array max_coord; - local_mesh.coordinates( Cajita::Node(), min_ind.data(), - min_coord.data() ); - local_mesh.coordinates( Cajita::Node(), max_ind.data(), - max_coord.data() ); - - auto halo_ids_func = KOKKOS_LAMBDA( const int p ) + if ( i != 0 || j != 0 || k != 0 ) { - Kokkos::Array pos = { - positions( p, Cajita::Dim::I ), - positions( p, Cajita::Dim::J ), - positions( p, Cajita::Dim::K ) }; - - // Check the if particle is both in the owned space - // and the ghosted space of this neighbor (ignore - // the current cell). - if ( ( pos[Cajita::Dim::I] > - min_coord[Cajita::Dim::I] && - pos[Cajita::Dim::I] < - max_coord[Cajita::Dim::I] ) && - ( pos[Cajita::Dim::J] > - min_coord[Cajita::Dim::J] && - pos[Cajita::Dim::J] < - max_coord[Cajita::Dim::J] ) && - ( pos[Cajita::Dim::K] > - min_coord[Cajita::Dim::K] && - pos[Cajita::Dim::K] < - max_coord[Cajita::Dim::K] ) && - ( i != 0 || j != 0 || k != 0 ) ) + const int _neighbor_rank = topology( nr ); + if ( _neighbor_rank == unique_topology( ar ) ) { - const std::size_t sc = send_count()++; - // If the size of the arrays is exceeded, keep - // counting to resize and fill next. - if ( sc < destinations.extent( 0 ) ) - { - // Keep the destination MPI rank. - destinations( sc ) = neighbor_rank; - // Keep the particle ID. - ids( sc ) = p; - - // Determine if this ghost particle needs to - // be shifted through the periodic boundary. - const Kokkos::Array ijk = { i, j, - k }; - for ( int d = 0; d < 3; ++d ) - { - shifts( sc, d ) = 0.0; - if ( periodic[d] && ijk[d] ) - { - if ( pos[d] > global_high[d] ) - shifts( sc, d ) = - -global_extent[d]; - else if ( pos[d] < global_low[d] ) - shifts( sc, d ) = - global_extent[d]; - } - } - } + auto sis = local_grid.sharedIndexSpace( + Cajita::Own(), Cajita::Cell(), i, j, k, + _min_halo ); + const int min_ind_i = sis.min( Cajita::Dim::I ); + const int min_ind_j = sis.min( Cajita::Dim::J ); + const int min_ind_k = sis.min( Cajita::Dim::K ); + Kokkos::Array min_ind = { + min_ind_i, min_ind_j, min_ind_k }; + const int max_ind_i = + sis.max( Cajita::Dim::I ) + 1; + const int max_ind_j = + sis.max( Cajita::Dim::J ) + 1; + const int max_ind_k = + sis.max( Cajita::Dim::K ) + 1; + Kokkos::Array max_ind = { + max_ind_i, max_ind_j, max_ind_k }; + + local_mesh.coordinates( Cajita::Node(), + min_ind.data(), + _min_coord.data() ); + local_mesh.coordinates( Cajita::Node(), + max_ind.data(), + _max_coord.data() ); + _ijk = { i, j, k }; + + Kokkos::parallel_for( "get_halo_ids", policy, + *this ); + Kokkos::fence(); } - }; - Kokkos::parallel_for( "get_halo_ids", policy, - halo_ids_func ); - Kokkos::fence(); + } } } + } + // Shift periodic coordinates in send buffers. + } } - // Shift periodic coordinates in send buffers. -} + void rebuild( const LocalGridType& local_grid ) + { + // Resize views to actual send sizes. + int dest_size = _destinations.extent( 0 ); + int dest_count = 0; + Kokkos::deep_copy( dest_count, _send_count ); + if ( dest_count != dest_size ) + { + Kokkos::resize( _destinations, dest_count ); + Kokkos::resize( _ids, dest_count ); + Kokkos::resize( _shifts, dest_count, 3 ); + } + + // If original view sizes were exceeded, only counting was done so + // we need to rerun. + if ( dest_count > dest_size ) + { + Kokkos::deep_copy( _send_count, 0 ); + build( local_grid ); + } + } +}; } // namespace Impl //---------------------------------------------------------------------------// /*! - \brief gridHalo determines which data should be ghosted on another - decomposition, using bounds of a Cajita grid and taking periodic boundaries - into account. + \class PeriodicHalo + + \brief Halo communication plan including periodic boundary shifts. + + \tparam DeviceType Device type for which the data for this class will be + allocated and where parallel execution occurs. + + \tparam PositionIndex Particle position index within the AoSoA. + +*/ +template +struct PeriodicHalo : public Halo +{ + Kokkos::View _shifts; + + /*! + \brief Constructor + + \tparam IdViewType The container type for the export element ids. + + \tparam RankViewType The container type for the export element ranks. + + \tparam NeighType The container type for the neighbor ranks. + + \tparam ShiftViewType The periodic shift Kokkos View type. + + \param comm The MPI communicator over which the halo is defined. + + \param num_local The number of locally-owned elements on this rank. + + \param element_export_ids The local ids of the elements that will be + exported to other ranks to be used as ghosts. + + \param element_export_ranks The ranks to which we will send each element + in element_export_ids. + + \param neighbor_ranks List of ranks this rank will send to and receive + from. + + \param shifts The periodic shifts for each element being sent. + */ + template + PeriodicHalo( MPI_Comm comm, const std::size_t num_local, + const IdViewType& element_export_ids, + const RankViewType& element_export_ranks, + const NeighViewType& neighbor_ranks, + const ShiftViewType shifts ) + : Halo( comm, num_local, element_export_ids, + element_export_ranks, neighbor_ranks ) + , _shifts( shifts ) + { + } + + /*! + \brief Modify the send buffer with periodic shifts. + + \tparam ViewType The container type for the send buffer. + + \param send_buffer Send buffer of ositions being ghosted. + + \param i Particle index. + */ + template + KOKKOS_INLINE_FUNCTION void operator()( ViewType& send_buffer, + const int i ) const + { + for ( int d = 0; d < 3; ++d ) + get( send_buffer( i ), d ) += _shifts( i, d ); + } +}; + +//---------------------------------------------------------------------------// +/*! + \brief Determine which data should be ghosted on another decomposition, using + bounds of a Cajita grid and taking periodic boundaries into account. Slice + variant. \tparam LocalGridType Cajita LocalGrid type. @@ -580,17 +683,23 @@ void getHaloIds( const LocalGridType& local_grid, CountView& send_count, \param positions The particle positions. + \param PositionIndex Particle position index within the AoSoA. + \param min_halo_width Number of halo mesh widths to include for ghosting. \param max_export_guess The allocation size for halo export ranks, IDs, and periodic shifts - \return pair A std::pair containing the Halo and periodic shift array for - a later gather. + \return PeriodicHalo Halo containing periodic shift information. */ -template -auto gridHalo( const LocalGridType& local_grid, PositionSliceType& positions, - const int min_halo_width, const int max_export_guess = 0 ) +template +auto gridHalo( + const LocalGridType& local_grid, const PositionSliceType& positions, + std::integral_constant, + const int min_halo_width, const int max_export_guess = 0, + typename std::enable_if::value, int>::type* = + 0 ) { using device_type = typename PositionSliceType::device_type; using pos_value = typename PositionSliceType::value_type; @@ -598,97 +707,44 @@ auto gridHalo( const LocalGridType& local_grid, PositionSliceType& positions, // Get all 26 neighbor ranks. auto topology = Impl::getTopology( local_grid ); - Kokkos::View destinations( + using DestinationRankView = typename Kokkos::View; + using ShiftViewType = typename Kokkos::View; + using CountView = + typename Kokkos::View>; + DestinationRankView destinations( Kokkos::ViewAllocateWithoutInitializing( "destinations" ), max_export_guess ); - Kokkos::View ids( - Kokkos::ViewAllocateWithoutInitializing( "ids" ), max_export_guess ); - Kokkos::View shifts( - Kokkos::ViewAllocateWithoutInitializing( "shifts" ), max_export_guess, - 3 ); - Kokkos::View> - send_count( "halo_send_count" ); + DestinationRankView ids( Kokkos::ViewAllocateWithoutInitializing( "ids" ), + max_export_guess ); + ShiftViewType shifts( Kokkos::ViewAllocateWithoutInitializing( "shifts" ), + max_export_guess, 3 ); + CountView send_count( "halo_send_count" ); // Determine which particles need to be ghosted to neighbors. - Impl::getHaloIds( local_grid, send_count, destinations, ids, shifts, - positions, min_halo_width ); - - // Resize views to actual send sizes. - int dest_size = destinations.extent( 0 ); - int dest_count = 0; - Kokkos::deep_copy( dest_count, send_count ); - if ( dest_count != dest_size ) - { - Kokkos::resize( destinations, dest_count ); - Kokkos::resize( ids, dest_count ); - Kokkos::resize( shifts, dest_count, 3 ); - } - - // If original view sizes were exceeded, only counting was done so we - // need to rerun. - if ( dest_count > dest_size ) - { - Kokkos::deep_copy( send_count, 0 ); - Impl::getHaloIds( local_grid, send_count, destinations, ids, shifts, - positions, min_halo_width ); - } - - // Ensure neighbor ranks are valid and unique. - auto unique_topology = Impl::getUniqueTopology( topology ); + auto halo_ids = Impl::HaloIds( + local_grid, positions, send_count, destinations, ids, shifts, + min_halo_width ); + // Rebuild if needed. + halo_ids.rebuild( local_grid ); // Create the Cabana Halo. - auto halo = - Halo( local_grid.globalGrid().comm(), positions.size(), - ids, destinations, unique_topology ); - return std::make_pair( halo, shifts ); + auto halo = PeriodicHalo( + local_grid.globalGrid().comm(), positions.size(), ids, destinations, + topology, shifts ); + return halo; } //---------------------------------------------------------------------------// /*! - \class PeriodicHalo - - \brief Store information for periodic halo communication. -*/ -template -struct PeriodicHalo -{ - using TupleType = typename ParticleContainer::tuple_type; - - const HaloType _halo; - const ShiftViewType _shifts; - - /*! - \brief Constructor. - - \param pair Pair of inputs containing Halo and periodic shift View. - This pair is returned by the gridHalo function. - - \param PositionIndex Particle position index within the AoSoA. - */ - PeriodicHalo( std::pair pair ) - : _halo( pair.first ) - , _shifts( pair.second ) - { - } - ~PeriodicHalo() {} - - auto getHalo() const { return _halo; } - - template - KOKKOS_INLINE_FUNCTION void operator()( ViewType& send_buffer, - const int i ) const - { - for ( int d = 0; d < 3; ++d ) - get( send_buffer( i ), d ) += _shifts( i, d ); - } -}; + \brief Determine which data should be ghosted on another decomposition, using + bounds of a Cajita grid and taking periodic boundaries into account. AoSoA + variant. -//---------------------------------------------------------------------------// -/*! - \brief Create a periodic halo. + \tparam LocalGridType Cajita LocalGrid type. + \tparam ParticleContainer AoSoA type. \param local_grid The local grid for creating halo and periodicity. \param particles The particle AoSoA, containing positions. @@ -700,34 +756,28 @@ struct PeriodicHalo \param max_export_guess The allocation size for halo export ranks, IDs, and periodic shifts. - \return pair A std::pair containing the Halo and periodic shift array. + \return PeriodicHalo Halo containing periodic shift information. */ template -auto createPeriodicHalo( const LocalGridType& local_grid, - const ParticleContainer& particles, - std::integral_constant, - const int min_halo_width, - const int max_export_guess = 0 ) +auto gridHalo( + const LocalGridType& local_grid, const ParticleContainer& particles, + std::integral_constant, + const int min_halo_width, const int max_export_guess = 0, + typename std::enable_if::value, int>::type* = + 0 ) { - using view_type = - Kokkos::View; - using halo_type = Halo; - auto positions = slice( particles ); - std::pair pair = - gridHalo( local_grid, positions, min_halo_width, max_export_guess ); - - using phalo_type = - PeriodicHalo; - return phalo_type( pair ); + return gridHalo( local_grid, positions, + std::integral_constant(), + min_halo_width, max_export_guess ); } //---------------------------------------------------------------------------// /*! - \brief gridGather gathers data from one decomposition and ghosts on - another decomposition, using the bounds and periodicity of a Cajita grid - to determine which particles should be copied. AoSoA variant. + \brief Gather data from one decomposition and ghosts on another decomposition, + using the bounds and periodicity of a Cajita grid to determine which particles + should be copied. AoSoA variant. \tparam PeriodicHaloType Periodic halo type. @@ -741,14 +791,13 @@ template void gridGather( const PeriodicHaloType& periodic_halo, ParticleContainer& particles ) { - auto halo = periodic_halo.getHalo(); - particles.resize( halo.numLocal() + halo.numGhost() ); + particles.resize( periodic_halo.numLocal() + periodic_halo.numGhost() ); - gather( halo, particles, periodic_halo ); + gather( periodic_halo, particles ); } // TODO: slice version -} // end namespace Cabana +} // namespace Cabana #endif // end CABANA_GRIDCOMM_HPP diff --git a/core/unit_test/tstParticleGridCommunication.hpp b/core/unit_test/tstParticleGridCommunication.hpp index e4731f5ee..6b8522bcb 100644 --- a/core/unit_test/tstParticleGridCommunication.hpp +++ b/core/unit_test/tstParticleGridCommunication.hpp @@ -313,7 +313,7 @@ void testHalo( const int halo_width, const int test_type ) if ( test_type == 0 ) { // Do the gather with the AoSoA. - auto periodic_halo = createPeriodicHalo( + auto periodic_halo = gridHalo( *local_grid, data_src, std::integral_constant(), halo_width ); gridGather( periodic_halo, data_src );