From 8d8a3476b9a1de92e27d607d9bd74b2aee38cf4e Mon Sep 17 00:00:00 2001 From: Adrian-Diaz Date: Wed, 20 Nov 2024 11:42:00 -0700 Subject: [PATCH] WIP: CArray type --- examples/ann_distributed.cpp | 16 +- examples/ann_distributed_crs.cpp | 46 +-- src/include/Tpetra_LRMultiVector_def.hpp | 7 +- src/include/Tpetra_LR_WrappedDualView.hpp | 99 ++++++ src/include/tpetra_wrapper_types.h | 407 +++++++++++++--------- 5 files changed, 379 insertions(+), 196 deletions(-) diff --git a/examples/ann_distributed.cpp b/examples/ann_distributed.cpp index 83a6a2a4..1f448455 100644 --- a/examples/ann_distributed.cpp +++ b/examples/ann_distributed.cpp @@ -78,8 +78,8 @@ std::vector num_nodes_in_layer = {64000, 30000, 8000, 4000, 2000, 1000, // array of ANN structs struct ANNLayer_t{ //input map will store every global id in the vector for simplificty of row-vector products in this example - TpetraPartitionMap output_partition_map; //map with all comms for row-vector product - TpetraPartitionMap output_unique_map; //submap of uniquely decomposed indices + TpetraPartitionMap<> output_partition_map; //map with all comms for row-vector product + TpetraPartitionMap<> output_unique_map; //submap of uniquely decomposed indices TpetraDFArray distributed_outputs; TpetraDFArray distributed_weights; TpetraDFArray distributed_biases; @@ -247,17 +247,17 @@ int main(int argc, char* argv[]) CArray ANNLayers(num_layers); // starts at 1 and goes to num_layers // input and ouput values to ANN - TpetraPartitionMap input_pmap, input_unique_pmap; + TpetraPartitionMap<> input_pmap, input_unique_pmap; DCArrayKokkos all_layer_indices(num_nodes_in_layer[0]); FOR_ALL(i,0,num_nodes_in_layer[0], { all_layer_indices(i) = i; }); all_layer_indices.update_host(); // copy inputs to device //map of all indices in this layer to be used for row-vector product (in practice, this would not include all indices in the layer) - input_pmap = TpetraPartitionMap(all_layer_indices); + input_pmap = TpetraPartitionMap<>(all_layer_indices); //map that decomposes indices of this onto set of processes uniquely (used to demonstrate comms for above) - input_unique_pmap = TpetraPartitionMap(num_nodes_in_layer[0]); + input_unique_pmap = TpetraPartitionMap<>(num_nodes_in_layer[0]); TpetraDFArray inputs(input_pmap); //rows decomposed onto processes //comming from subview requires both the original map and the submap to be composed of contiguous indices inputs.own_comm_setup(input_unique_pmap); //tells the vector its communicating from a contiguous subset of its own data @@ -275,8 +275,8 @@ int main(int argc, char* argv[]) all_current_layer_indices(i) = i; }); - ANNLayers(layer).output_partition_map = TpetraPartitionMap(all_current_layer_indices); - ANNLayers(layer).output_unique_map = TpetraPartitionMap(num_nodes_in_layer[layer+1]); + ANNLayers(layer).output_partition_map = TpetraPartitionMap<>(all_current_layer_indices); + ANNLayers(layer).output_unique_map = TpetraPartitionMap<>(num_nodes_in_layer[layer+1]); ANNLayers(layer).distributed_outputs = TpetraDFArray (ANNLayers(layer).output_partition_map); //comming from subview requires both the original map and the submap to be composed of contiguous indices ANNLayers(layer).distributed_outputs.own_comm_setup(ANNLayers(layer).output_unique_map); @@ -422,7 +422,7 @@ int main(int argc, char* argv[]) output_grid.print(); //get repartitioned map to distribute new arrays with it - TpetraPartitionMap partitioned_output_map = output_grid.pmap; + TpetraPartitionMap<> partitioned_output_map = output_grid.pmap; TpetraDFArray partitioned_output_values(partitioned_output_map, "partitioned output values"); //construct a unique source vector from ANN output using the subview constructor diff --git a/examples/ann_distributed_crs.cpp b/examples/ann_distributed_crs.cpp index 35f4abdf..4284aeb2 100644 --- a/examples/ann_distributed_crs.cpp +++ b/examples/ann_distributed_crs.cpp @@ -78,11 +78,11 @@ std::vector num_nodes_in_layer = {64000, 30000, 8000, 4000, 2000, 1000, // array of ANN structs struct ANNLayer_t{ //input map will store every global id in the vector for simplificty of row-vector products in this example - TpetraPartitionMap output_partition_map; //map with all comms for row-vector product - TpetraPartitionMap output_unique_map; //submap of uniquely decomposed indices - TpetraDFArray distributed_outputs; + TpetraPartitionMap<> output_partition_map; //map with all comms for row-vector product + TpetraPartitionMap<> output_unique_map; //submap of uniquely decomposed indices + TpetraDCArray distributed_outputs; TpetraDCArray distributed_weights; - TpetraDFArray distributed_biases; + TpetraDCArray distributed_biases; }; // end struct @@ -93,8 +93,8 @@ struct ANNLayer_t{ // functions // // ================================================================= -void vec_mat_multiply(TpetraDFArray &inputs, - TpetraDFArray &outputs, +void vec_mat_multiply(TpetraDCArray &inputs, + TpetraDCArray &outputs, TpetraDCArray &matrix){ const size_t num_i = inputs.size(); @@ -144,10 +144,10 @@ float sigmoid_derivative(const float value){ -void forward_propagate_layer(TpetraDFArray &inputs, - TpetraDFArray &outputs, +void forward_propagate_layer(TpetraDCArray &inputs, + TpetraDCArray &outputs, TpetraDCArray &weights, - const TpetraDFArray &biases){ + const TpetraDCArray &biases){ const size_t num_i = inputs.size(); const size_t num_j = outputs.submap_size(); @@ -199,7 +199,7 @@ void forward_propagate_layer(TpetraDFArray &inputs, }; // end function -void set_biases(const TpetraDFArray &biases){ +void set_biases(const TpetraDCArray &biases){ const size_t num_j = biases.size(); FOR_ALL(j,0,num_j, { @@ -246,18 +246,18 @@ int main(int argc, char* argv[]) CArray ANNLayers(num_layers); // starts at 1 and goes to num_layers // input and ouput values to ANN - TpetraPartitionMap input_pmap, input_unique_pmap; + TpetraPartitionMap<> input_pmap, input_unique_pmap; DCArrayKokkos all_layer_indices(num_nodes_in_layer[0]); FOR_ALL(i,0,num_nodes_in_layer[0], { all_layer_indices(i) = i; }); all_layer_indices.update_host(); // copy inputs to device //map of all indices in this layer to be used for row-vector product (in practice, this would not include all indices in the layer) - input_pmap = TpetraPartitionMap(all_layer_indices); + input_pmap = TpetraPartitionMap<>(all_layer_indices); //map that decomposes indices of this onto set of processes uniquely (used to demonstrate comms for above) - input_unique_pmap = TpetraPartitionMap(num_nodes_in_layer[0]); - TpetraDFArray inputs(input_pmap); //rows decomposed onto processes + input_unique_pmap = TpetraPartitionMap<>(num_nodes_in_layer[0]); + TpetraDCArray inputs(input_pmap); //rows decomposed onto processes //comming from subview requires both the original map and the submap to be composed of contiguous indices inputs.own_comm_setup(input_unique_pmap); //tells the vector its communicating from a contiguous subset of its own data @@ -274,14 +274,14 @@ int main(int argc, char* argv[]) all_current_layer_indices(i) = i; }); - ANNLayers(layer).output_partition_map = TpetraPartitionMap(all_current_layer_indices); - ANNLayers(layer).output_unique_map = TpetraPartitionMap(num_nodes_in_layer[layer+1]); - ANNLayers(layer).distributed_outputs = TpetraDFArray (ANNLayers(layer).output_partition_map); + ANNLayers(layer).output_partition_map = TpetraPartitionMap<>(all_current_layer_indices); + ANNLayers(layer).output_unique_map = TpetraPartitionMap<>(num_nodes_in_layer[layer+1]); + ANNLayers(layer).distributed_outputs = TpetraDCArray (ANNLayers(layer).output_partition_map); //comming from subview requires both the original map and the submap to be composed of contiguous indices ANNLayers(layer).distributed_outputs.own_comm_setup(ANNLayers(layer).output_unique_map); // allocate the weights in this layer ANNLayers(layer).distributed_weights = TpetraDCArray (num_j, num_i); - ANNLayers(layer).distributed_biases = TpetraDFArray (num_j); + ANNLayers(layer).distributed_biases = TpetraDCArray (num_j); } // end for @@ -392,7 +392,7 @@ int main(int argc, char* argv[]) //test repartition; assume a 10 by 10 grid of outputs from ANN //assign coords to each grid point, find a partition of the grid, then repartition output layer using new map - TpetraDFArray output_grid(100, 2); //array of 2D coordinates for 10 by 10 grid of points + TpetraDCArray output_grid(100, 2); //array of 2D coordinates for 10 by 10 grid of points //populate coords FOR_ALL(i,0,output_grid.dims(0), { @@ -423,16 +423,16 @@ int main(int argc, char* argv[]) output_grid.print(); //get repartitioned map to distribute new arrays with it - TpetraPartitionMap partitioned_output_map = output_grid.pmap; - TpetraDFArray partitioned_output_values(partitioned_output_map, "partitioned output values"); + TpetraPartitionMap<> partitioned_output_map = output_grid.pmap; + TpetraDCArray partitioned_output_values(partitioned_output_map, "partitioned output values"); //construct a unique source vector from ANN output using the subview constructor //(for example's sake this is in fact a copy of the subview wrapped by the output as well) - TpetraDFArray sub_output_values(ANNLayers(num_layers-1).distributed_outputs, ANNLayers(num_layers-1).distributed_outputs.comm_pmap, + TpetraDCArray sub_output_values(ANNLayers(num_layers-1).distributed_outputs, ANNLayers(num_layers-1).distributed_outputs.comm_pmap, ANNLayers(num_layers-1).distributed_outputs.comm_pmap.getMinGlobalIndex()); //general communication object between two vectors/arrays - TpetraCommunicationPlan output_comms(partitioned_output_values, sub_output_values); + TpetraLRCommunicationPlan output_comms(partitioned_output_values, sub_output_values); output_comms.execute_comms(); partitioned_output_values.print(); diff --git a/src/include/Tpetra_LRMultiVector_def.hpp b/src/include/Tpetra_LRMultiVector_def.hpp index 224c39a7..c2870cb3 100644 --- a/src/include/Tpetra_LRMultiVector_def.hpp +++ b/src/include/Tpetra_LRMultiVector_def.hpp @@ -304,12 +304,7 @@ namespace { // (anonymous) const size_t LDA = view.stride (1); const size_t numRows = view.extent (0); - if (LDA == 0) { - return (numRows == 0) ? size_t (1) : numRows; - } - else { - return LDA; - } + return numRows; } template diff --git a/src/include/Tpetra_LR_WrappedDualView.hpp b/src/include/Tpetra_LR_WrappedDualView.hpp index 3fc64ae9..245615a2 100644 --- a/src/include/Tpetra_LR_WrappedDualView.hpp +++ b/src/include/Tpetra_LR_WrappedDualView.hpp @@ -15,8 +15,15 @@ #include #include "Teuchos_TestForException.hpp" #include "Tpetra_Details_ExecutionSpaces.hpp" +#include "Tpetra_Details_gathervPrint.hpp" #include +// #include "Tpetra_Details_WrappedDualView.hpp" +// #include "Kokkos_DualView.hpp" +// #include "Teuchos_TypeNameTraits.hpp" +// #include "Teuchos_Comm.hpp" +// #include "Teuchos_CommHelpers.hpp" + //#define DEBUG_UVM_REMOVAL // Works only with gcc > 4.8 #ifdef DEBUG_UVM_REMOVAL @@ -625,6 +632,98 @@ class LRWrappedDualView { mutable DualViewType dualView; }; +/// \brief Is the given Tpetra::WrappedDualView valid? +/// +/// A WrappedDualView is valid if both of its constituent Views are valid. +template +bool +checkLocalWrappedDualViewValidity + (std::ostream* const lclErrStrm, + const int myMpiProcessRank, + const Tpetra::Details::LRWrappedDualView >& dv) +{ + const bool dev_good = dv.is_valid_device(); + const bool host_good = dv. is_valid_host(); + const bool good = dev_good && host_good; + if (! good && lclErrStrm != nullptr) { + using Teuchos::TypeNameTraits; + using std::endl; + using dv_type = + Tpetra::Details::WrappedDualView >; + + const std::string dvName = TypeNameTraits::name (); + *lclErrStrm << "Proc " << myMpiProcessRank << ": Tpetra::WrappedDualView " + "of type " << dvName << " has one or more invalid Views. See " + "above error messages from this MPI process for details." << endl; + } + return good; +} + +template +bool +checkGlobalWrappedDualViewValidity +(std::ostream* const gblErrStrm, + const Tpetra::Details::LRWrappedDualView >& dv, + const bool verbose, + const Teuchos::Comm* const comm) +{ + using std::endl; + const int myRank = comm == nullptr ? 0 : comm->getRank (); + std::ostringstream lclErrStrm; + int lclSuccess = 1; + + try { + const bool lclValid = + checkLocalWrappedDualViewValidity (&lclErrStrm, myRank, dv); + lclSuccess = lclValid ? 1 : 0; + } + catch (std::exception& e) { + lclErrStrm << "Proc " << myRank << ": checkLocalDualViewValidity " + "threw an exception: " << e.what () << endl; + lclSuccess = 0; + } + catch (...) { + lclErrStrm << "Proc " << myRank << ": checkLocalDualViewValidity " + "threw an exception not a subclass of std::exception." << endl; + lclSuccess = 0; + } + + int gblSuccess = 0; // output argument + if (comm == nullptr) { + gblSuccess = lclSuccess; + } + else { + using Teuchos::outArg; + using Teuchos::REDUCE_MIN; + using Teuchos::reduceAll; + reduceAll (*comm, REDUCE_MIN, lclSuccess, outArg (gblSuccess)); + } + + if (gblSuccess != 1 && gblErrStrm != nullptr) { + *gblErrStrm << "On at least one (MPI) process, the " + "Kokkos::DualView has " + "either the device or host pointer in the " + "DualView equal to null, but the DualView has a nonzero number of " + "rows. For more detailed information, please rerun with the " + "TPETRA_VERBOSE environment variable set to 1. "; + if (verbose) { + *gblErrStrm << " Here are error messages from all " + "processes:" << endl; + if (comm == nullptr) { + *gblErrStrm << lclErrStrm.str (); + } + else { + using Tpetra::Details::gathervPrint; + gathervPrint (*gblErrStrm, lclErrStrm.str (), *comm); + } + } + *gblErrStrm << endl; + } + return gblSuccess == 1; +} + } // namespace Details } // namespace Tpetra diff --git a/src/include/tpetra_wrapper_types.h b/src/include/tpetra_wrapper_types.h index 6d93cf58..f18b81bd 100644 --- a/src/include/tpetra_wrapper_types.h +++ b/src/include/tpetra_wrapper_types.h @@ -88,12 +88,12 @@ namespace mtr ///////////////////////// // TpetraPartitionMap: Container storing global indices corresponding to local indices that belong on this process/rank as well as comms related data/functions. ///////////////////////// -template +template class TpetraPartitionMap { // these are unmanaged - using TArray1D_host = Kokkos::View ; - using TArray1D_dev = Kokkos::View ; + using TArray1D_host = Kokkos::View ; + using TArray1D_dev = Kokkos::View ; protected: @@ -102,7 +102,6 @@ class TpetraPartitionMap { TArray1D_host host; TArray1D_dev device; - void set_mpi_type(); public: @@ -117,18 +116,18 @@ class TpetraPartitionMap { TpetraPartitionMap(); //Copy Constructor - TpetraPartitionMap(const TpetraPartitionMap &temp){ + TpetraPartitionMap(const TpetraPartitionMap &temp){ *this = temp; } TpetraPartitionMap(size_t global_length, MPI_Comm mpi_comm = MPI_COMM_WORLD, const std::string& tag_string = DEFAULTSTRINGARRAY); - TpetraPartitionMap(DCArrayKokkos &indices, MPI_Comm mpi_comm = MPI_COMM_WORLD, const std::string& tag_string = DEFAULTSTRINGARRAY); + TpetraPartitionMap(DCArrayKokkos &indices, MPI_Comm mpi_comm = MPI_COMM_WORLD, const std::string& tag_string = DEFAULTSTRINGARRAY); TpetraPartitionMap(Teuchos::RCP> input_tpetra_map, const std::string& tag_string = DEFAULTSTRINGARRAY); KOKKOS_INLINE_FUNCTION - T& operator()(size_t i) const; + long long int& operator()(size_t i) const; KOKKOS_INLINE_FUNCTION TpetraPartitionMap& operator=(const TpetraPartitionMap& temp); @@ -163,10 +162,10 @@ class TpetraPartitionMap { // Method returns the raw device pointer of the Kokkos DualView KOKKOS_INLINE_FUNCTION - T* device_pointer() const; + long long int* device_pointer() const; // Method returns the raw host pointer of the Kokkos DualView - T* host_pointer() const; + long long int* host_pointer() const; void print() const; @@ -177,14 +176,14 @@ class TpetraPartitionMap { // Default constructor -template -TpetraPartitionMap::TpetraPartitionMap() { +template +TpetraPartitionMap::TpetraPartitionMap() { length_ = 0; } //Constructor for contiguous index decomposition -template -TpetraPartitionMap::TpetraPartitionMap(size_t global_length, MPI_Comm mpi_comm, const std::string& tag_string) { +template +TpetraPartitionMap::TpetraPartitionMap(size_t global_length, MPI_Comm mpi_comm, const std::string& tag_string) { mpi_comm_ = mpi_comm; Teuchos::RCP> teuchos_comm = Teuchos::rcp(new Teuchos::MpiComm(mpi_comm)); tpetra_map = Teuchos::rcp(new Tpetra::Map((long long int) global_length, 0, teuchos_comm)); @@ -192,12 +191,12 @@ TpetraPartitionMap::TpetraPartitionMap(size_t g TArray1D_host host = tpetra_map->getMyGlobalIndices(); TArray1D_dev device = tpetra_map->getMyGlobalIndicesDevice(); length_ = host.size(); - set_mpi_type(); + mpi_datatype_ = MPI_LONG_LONG_INT; } // Constructor to pass matar dual view of indices -template -TpetraPartitionMap::TpetraPartitionMap(DCArrayKokkos &indices, MPI_Comm mpi_comm, const std::string& tag_string) { +template +TpetraPartitionMap::TpetraPartitionMap(DCArrayKokkos &indices, MPI_Comm mpi_comm, const std::string& tag_string) { mpi_comm_ = mpi_comm; Teuchos::RCP> teuchos_comm = Teuchos::rcp(new Teuchos::MpiComm(mpi_comm)); tpetra_map = Teuchos::rcp(new Tpetra::Map(Teuchos::OrdinalTraits::invalid(), indices.get_kokkos_dual_view().d_view, 0, teuchos_comm)); @@ -205,12 +204,12 @@ TpetraPartitionMap::TpetraPartitionMap(DCArrayK TArray1D_dev device = tpetra_map->getMyGlobalIndicesDevice(); length_ = host.size(); num_global_ = tpetra_map->getGlobalNumElements(); - set_mpi_type(); + mpi_datatype_ = MPI_LONG_LONG_INT; } // Constructor to pass an existing Tpetra map -template -TpetraPartitionMap::TpetraPartitionMap(Teuchos::RCP> input_tpetra_map, const std::string& tag_string) { +template +TpetraPartitionMap::TpetraPartitionMap(Teuchos::RCP> input_tpetra_map, const std::string& tag_string) { tpetra_map = input_tpetra_map; Teuchos::RCP> teuchos_comm = tpetra_map->getComm(); mpi_comm_ = getRawMpiComm(*teuchos_comm); @@ -218,46 +217,20 @@ TpetraPartitionMap::TpetraPartitionMap(Teuchos: TArray1D_dev device = input_tpetra_map->getMyGlobalIndicesDevice(); length_ = host.size(); num_global_ = tpetra_map->getGlobalNumElements(); - set_mpi_type(); + mpi_datatype_ = MPI_LONG_LONG_INT; } -template -void TpetraPartitionMap::set_mpi_type() { - if (typeid(T).name() == typeid(bool).name()) { - mpi_datatype_ = MPI_C_BOOL; - } - else if (typeid(T).name() == typeid(int).name()) { - mpi_datatype_ = MPI_INT; - } - else if (typeid(T).name() == typeid(long int).name()) { - mpi_datatype_ = MPI_LONG; - } - else if (typeid(T).name() == typeid(long long int).name()) { - mpi_datatype_ = MPI_LONG_LONG_INT; - } - else if (typeid(T).name() == typeid(float).name()) { - mpi_datatype_ = MPI_FLOAT; - } - else if (typeid(T).name() == typeid(double).name()) { - mpi_datatype_ = MPI_DOUBLE; - } - else { - printf("Your entered TpetraPartitionMap type is not a supported type for MPI communications and is being set to int\n"); - mpi_datatype_ = MPI_INT; - } -} - -template +template KOKKOS_INLINE_FUNCTION -T& TpetraPartitionMap::operator()(size_t i) const { +long long int& TpetraPartitionMap::operator()(size_t i) const { assert(order_ == 1 && "Tensor order (rank) does not match constructor in TpetraPartitionMap 1D!"); assert(i >= 0 && i < dims_[0] && "i is out of bounds in TpetraPartitionMap 1D!"); return device(i); } -template +template KOKKOS_INLINE_FUNCTION -TpetraPartitionMap& TpetraPartitionMap::operator= (const TpetraPartitionMap& temp) { +TpetraPartitionMap& TpetraPartitionMap::operator= (const TpetraPartitionMap& temp) { // Do nothing if the assignment is of the form x = x if (this != &temp) { @@ -274,31 +247,31 @@ TpetraPartitionMap& TpetraPartitionMap +template KOKKOS_INLINE_FUNCTION -size_t TpetraPartitionMap::size() const { +size_t TpetraPartitionMap::size() const { return length_; } -template +template KOKKOS_INLINE_FUNCTION -size_t TpetraPartitionMap::extent() const { +size_t TpetraPartitionMap::extent() const { return length_; } -template +template KOKKOS_INLINE_FUNCTION -T* TpetraPartitionMap::device_pointer() const { +long long int* TpetraPartitionMap::device_pointer() const { return device.data(); } -template -T* TpetraPartitionMap::host_pointer() const { +template +long long int* TpetraPartitionMap::host_pointer() const { return host.data(); } -template -void TpetraPartitionMap::print() const { +template +void TpetraPartitionMap::print() const { std::ostream &out = std::cout; Teuchos::RCP fos; fos = Teuchos::fancyOStream(Teuchos::rcpFromRef(out)); @@ -306,56 +279,56 @@ void TpetraPartitionMap::print() const { } // Return local index (on this process/rank) corresponding to the input global index -template +template KOKKOS_INLINE_FUNCTION -int TpetraPartitionMap::getLocalIndex(int global_index) const { +int TpetraPartitionMap::getLocalIndex(int global_index) const { int local_index = tpetra_map->getLocalElement(global_index); return local_index; } // Return global index corresponding to the input local (on this process/rank) index -template +template KOKKOS_INLINE_FUNCTION -long long int TpetraPartitionMap::getGlobalIndex(int local_index) const { +long long int TpetraPartitionMap::getGlobalIndex(int local_index) const { int global_index = tpetra_map->getGlobalElement(local_index); return global_index; } // Return smallest global index (on this process/rank) -template +template KOKKOS_INLINE_FUNCTION -long long int TpetraPartitionMap::getMinGlobalIndex() const { +long long int TpetraPartitionMap::getMinGlobalIndex() const { int global_index = tpetra_map->getMinGlobalIndex(); return global_index; } // Return largest global index (on this process/rank) -template +template KOKKOS_INLINE_FUNCTION -long long int TpetraPartitionMap::getMaxGlobalIndex() const { +long long int TpetraPartitionMap::getMaxGlobalIndex() const { int global_index = tpetra_map->getMaxGlobalIndex(); return global_index; } // Return global index corresponding to the input local (on this process/rank) index -template +template KOKKOS_INLINE_FUNCTION -bool TpetraPartitionMap::isProcessGlobalIndex(int global_index) const { +bool TpetraPartitionMap::isProcessGlobalIndex(int global_index) const { bool belongs = tpetra_map->isNodeGlobalElement(global_index); return belongs; } // Return global index corresponding to the input local (on this process/rank) index -template +template KOKKOS_INLINE_FUNCTION -bool TpetraPartitionMap::isProcessLocalIndex(int local_index) const { +bool TpetraPartitionMap::isProcessLocalIndex(int local_index) const { bool belongs = tpetra_map->isNodeGlobalElement(local_index); return belongs; } -template +template KOKKOS_INLINE_FUNCTION -TpetraPartitionMap::~TpetraPartitionMap() {} +TpetraPartitionMap::~TpetraPartitionMap() {} //////////////////////////////////////////////////////////////////////////////// // End of TpetraPartitionMap @@ -403,8 +376,8 @@ class TpetraDCArray { bool own_comms; //This Mapped MPI Array contains its own communication plan; just call array_comms() void set_mpi_type(); - TpetraPartitionMap pmap; - TpetraPartitionMap comm_pmap; + TpetraPartitionMap pmap; + TpetraPartitionMap comm_pmap; Teuchos::RCP> tpetra_pmap; Teuchos::RCP> tpetra_comm_pmap; Teuchos::RCP tpetra_vector; @@ -445,31 +418,31 @@ class TpetraDCArray { /* Specified Map Constructors*/ //Tpetra type for 1D case with a partition map passed in - TpetraDCArray(TpetraPartitionMap &input_pmap, + TpetraDCArray(TpetraPartitionMap &input_pmap, const std::string& tag_string = DEFAULTSTRINGARRAY); //2D Tpetra type with a partition map passed in - TpetraDCArray(TpetraPartitionMap &input_pmap, size_t dim1, + TpetraDCArray(TpetraPartitionMap &input_pmap, size_t dim1, const std::string& tag_string = DEFAULTSTRINGARRAY); //3D Tpetra type with a partition map passed in - TpetraDCArray(TpetraPartitionMap &input_pmap, size_t dim1, + TpetraDCArray(TpetraPartitionMap &input_pmap, size_t dim1, size_t dim2, const std::string& tag_string = DEFAULTSTRINGARRAY); //4D Tpetra type with a partition map passed in - TpetraDCArray(TpetraPartitionMap &input_pmap, size_t dim1, size_t dim2, size_t dim3, + TpetraDCArray(TpetraPartitionMap &input_pmap, size_t dim1, size_t dim2, size_t dim3, const std::string& tag_string = DEFAULTSTRINGARRAY); //5D Tpetra type with a partition map passed in - TpetraDCArray(TpetraPartitionMap &input_pmap, size_t dim1, size_t dim2, size_t dim3, + TpetraDCArray(TpetraPartitionMap &input_pmap, size_t dim1, size_t dim2, size_t dim3, size_t dim4, const std::string& tag_string = DEFAULTSTRINGARRAY); //6D Tpetra type with a partition map passed in - TpetraDCArray(TpetraPartitionMap &input_pmap, size_t dim1, size_t dim2, size_t dim3, + TpetraDCArray(TpetraPartitionMap &input_pmap, size_t dim1, size_t dim2, size_t dim3, size_t dim4, size_t dim5, const std::string& tag_string = DEFAULTSTRINGARRAY); //7D Tpetra type with a partition map passed in - TpetraDCArray(TpetraPartitionMap &input_pmap, size_t dim1, size_t dim2, size_t dim3, + TpetraDCArray(TpetraPartitionMap &input_pmap, size_t dim1, size_t dim2, size_t dim3, size_t dim4, size_t dim5, size_t dim6, const std::string& tag_string = DEFAULTSTRINGARRAY); //Tpetra type for 1D case(still allocates dim0 by 1 using **T); this constructor takes an RCP pointer to a Tpetra Map directly @@ -502,7 +475,7 @@ class TpetraDCArray { //construct an array that views a contiguous subset of another array; start index denotes the local index in super vector to start the sub view TpetraDCArray(const TpetraDCArray &super_vector, - const TpetraPartitionMap &sub_pmap, size_t start_index); + const TpetraPartitionMap &sub_pmap, size_t start_index); // 1D array setup void data_setup(const std::string& tag_string); @@ -533,7 +506,7 @@ class TpetraDCArray { void own_comm_setup(Teuchos::RCP> other_pmap); //only call if the map in the arg is a uniquely owned submap of the arrays map - void own_comm_setup(TpetraPartitionMap &other_pmap); //only call if the map in the arg is a uniquely owned submap of the arrays map + void own_comm_setup(TpetraPartitionMap &other_pmap); //only call if the map in the arg is a uniquely owned submap of the arrays map void perform_comms(); @@ -661,7 +634,7 @@ TpetraDCArray::TpetraDCArray(size_t dim0, const global_dim1_ = dim0; Teuchos::RCP> teuchos_comm = Teuchos::rcp(new Teuchos::MpiComm(mpi_comm_)); tpetra_pmap = Teuchos::rcp(new Tpetra::Map((long long int) dim0, 0, teuchos_comm)); - pmap = TpetraPartitionMap(tpetra_pmap); + pmap = TpetraPartitionMap(tpetra_pmap); own_comms = false; data_setup(tag_string); } @@ -673,7 +646,7 @@ TpetraDCArray::TpetraDCArray(size_t dim0, size_ global_dim1_ = dim0; Teuchos::RCP> teuchos_comm = Teuchos::rcp(new Teuchos::MpiComm(mpi_comm_)); tpetra_pmap = Teuchos::rcp(new Tpetra::Map((long long int) dim0, 0, teuchos_comm)); - pmap = TpetraPartitionMap(tpetra_pmap); + pmap = TpetraPartitionMap(tpetra_pmap); own_comms = false; data_setup(dim1, tag_string); } @@ -685,7 +658,7 @@ TpetraDCArray::TpetraDCArray(size_t dim0, size_ global_dim1_ = dim0; Teuchos::RCP> teuchos_comm = Teuchos::rcp(new Teuchos::MpiComm(mpi_comm_)); tpetra_pmap = Teuchos::rcp(new Tpetra::Map((long long int) dim0, 0, teuchos_comm)); - pmap = TpetraPartitionMap(tpetra_pmap); + pmap = TpetraPartitionMap(tpetra_pmap); own_comms = false; data_setup(dim1, dim2, tag_string); } @@ -697,7 +670,7 @@ TpetraDCArray::TpetraDCArray(size_t dim0, size_ global_dim1_ = dim0; Teuchos::RCP> teuchos_comm = Teuchos::rcp(new Teuchos::MpiComm(mpi_comm_)); tpetra_pmap = Teuchos::rcp(new Tpetra::Map((long long int) dim0, 0, teuchos_comm)); - pmap = TpetraPartitionMap(tpetra_pmap); + pmap = TpetraPartitionMap(tpetra_pmap); own_comms = false; data_setup(dim1, dim2, dim3, tag_string); } @@ -710,7 +683,7 @@ TpetraDCArray::TpetraDCArray(size_t dim0, size_ global_dim1_ = dim0; Teuchos::RCP> teuchos_comm = Teuchos::rcp(new Teuchos::MpiComm(mpi_comm_)); tpetra_pmap = Teuchos::rcp(new Tpetra::Map((long long int) dim0, 0, teuchos_comm)); - pmap = TpetraPartitionMap(tpetra_pmap); + pmap = TpetraPartitionMap(tpetra_pmap); own_comms = false; data_setup(dim1, dim2, dim3, dim4, tag_string); } @@ -723,7 +696,7 @@ TpetraDCArray::TpetraDCArray(size_t dim0, size_ global_dim1_ = dim0; Teuchos::RCP> teuchos_comm = Teuchos::rcp(new Teuchos::MpiComm(mpi_comm_)); tpetra_pmap = Teuchos::rcp(new Tpetra::Map((long long int) dim0, 0, teuchos_comm)); - pmap = TpetraPartitionMap(tpetra_pmap); + pmap = TpetraPartitionMap(tpetra_pmap); own_comms = false; data_setup(dim1, dim2, dim3, dim4, dim5, tag_string); } @@ -736,14 +709,14 @@ TpetraDCArray::TpetraDCArray(size_t dim0, size_ global_dim1_ = dim0; Teuchos::RCP> teuchos_comm = Teuchos::rcp(new Teuchos::MpiComm(mpi_comm_)); tpetra_pmap = Teuchos::rcp(new Tpetra::Map((long long int) dim0, 0, teuchos_comm)); - pmap = TpetraPartitionMap(tpetra_pmap); + pmap = TpetraPartitionMap(tpetra_pmap); own_comms = false; data_setup(dim1, dim2, dim3, dim4, dim5, dim6, tag_string); } // Overloaded 1D constructor where you provide a partition map template -TpetraDCArray::TpetraDCArray(TpetraPartitionMap &input_pmap, +TpetraDCArray::TpetraDCArray(TpetraPartitionMap &input_pmap, const std::string& tag_string) { mpi_comm_ = input_pmap.mpi_comm_; global_dim1_ = input_pmap.num_global_; @@ -755,7 +728,7 @@ TpetraDCArray::TpetraDCArray(TpetraPartitionMap // Overloaded 2D constructor where you provide a partition map template -TpetraDCArray::TpetraDCArray(TpetraPartitionMap &input_pmap, +TpetraDCArray::TpetraDCArray(TpetraPartitionMap &input_pmap, size_t dim1, const std::string& tag_string) { mpi_comm_ = input_pmap.mpi_comm_; global_dim1_ = input_pmap.num_global_; @@ -767,7 +740,7 @@ TpetraDCArray::TpetraDCArray(TpetraPartitionMap // Overloaded 3D constructor where you provide a partition map template -TpetraDCArray::TpetraDCArray(TpetraPartitionMap &input_pmap, +TpetraDCArray::TpetraDCArray(TpetraPartitionMap &input_pmap, size_t dim1, size_t dim2, const std::string& tag_string) { mpi_comm_ = input_pmap.mpi_comm_; global_dim1_ = input_pmap.num_global_; @@ -779,7 +752,7 @@ TpetraDCArray::TpetraDCArray(TpetraPartitionMap // Overloaded 4D constructor where you provide a partition map template -TpetraDCArray::TpetraDCArray(TpetraPartitionMap &input_pmap, +TpetraDCArray::TpetraDCArray(TpetraPartitionMap &input_pmap, size_t dim1, size_t dim2, size_t dim3, const std::string& tag_string) { mpi_comm_ = input_pmap.mpi_comm_; global_dim1_ = input_pmap.num_global_; @@ -791,7 +764,7 @@ TpetraDCArray::TpetraDCArray(TpetraPartitionMap // Overloaded 5D constructor where you provide a partition map template -TpetraDCArray::TpetraDCArray(TpetraPartitionMap &input_pmap, +TpetraDCArray::TpetraDCArray(TpetraPartitionMap &input_pmap, size_t dim1, size_t dim2, size_t dim3, size_t dim4, const std::string& tag_string) { mpi_comm_ = input_pmap.mpi_comm_; global_dim1_ = input_pmap.num_global_; @@ -803,7 +776,7 @@ TpetraDCArray::TpetraDCArray(TpetraPartitionMap // Overloaded 6D constructor where you provide a partition map template -TpetraDCArray::TpetraDCArray(TpetraPartitionMap &input_pmap, +TpetraDCArray::TpetraDCArray(TpetraPartitionMap &input_pmap, size_t dim1, size_t dim2, size_t dim3, size_t dim4, size_t dim5, const std::string& tag_string) { mpi_comm_ = input_pmap.mpi_comm_; @@ -816,7 +789,7 @@ TpetraDCArray::TpetraDCArray(TpetraPartitionMap // Overloaded 7D constructor where you provide a partition map template -TpetraDCArray::TpetraDCArray(TpetraPartitionMap &input_pmap, +TpetraDCArray::TpetraDCArray(TpetraPartitionMap &input_pmap, size_t dim1, size_t dim2, size_t dim3, size_t dim4, size_t dim5, size_t dim6, const std::string& tag_string) { mpi_comm_ = input_pmap.mpi_comm_; @@ -834,7 +807,7 @@ TpetraDCArray::TpetraDCArray(Teuchos::RCPgetGlobalNumElements(); tpetra_pmap = input_pmap; - pmap = TpetraPartitionMap(tpetra_pmap); + pmap = TpetraPartitionMap(tpetra_pmap); own_comms = false; data_setup(tag_string); } @@ -846,7 +819,7 @@ TpetraDCArray::TpetraDCArray(Teuchos::RCPgetGlobalNumElements(); tpetra_pmap = input_pmap; - pmap = TpetraPartitionMap(tpetra_pmap); + pmap = TpetraPartitionMap(tpetra_pmap); own_comms = false; data_setup(dim1, tag_string); } @@ -858,7 +831,7 @@ TpetraDCArray::TpetraDCArray(Teuchos::RCPgetGlobalNumElements(); tpetra_pmap = input_pmap; - pmap = TpetraPartitionMap(tpetra_pmap); + pmap = TpetraPartitionMap(tpetra_pmap); own_comms = false; data_setup(dim1, dim2, tag_string); } @@ -870,7 +843,7 @@ TpetraDCArray::TpetraDCArray(Teuchos::RCPgetGlobalNumElements(); tpetra_pmap = input_pmap; - pmap = TpetraPartitionMap(tpetra_pmap); + pmap = TpetraPartitionMap(tpetra_pmap); own_comms = false; data_setup(dim1, dim2, dim3, tag_string); } @@ -882,7 +855,7 @@ TpetraDCArray::TpetraDCArray(Teuchos::RCPgetGlobalNumElements(); tpetra_pmap = input_pmap; - pmap = TpetraPartitionMap(tpetra_pmap); + pmap = TpetraPartitionMap(tpetra_pmap); own_comms = false; data_setup(dim1, dim2, dim3, dim4, tag_string); } @@ -895,7 +868,7 @@ TpetraDCArray::TpetraDCArray(Teuchos::RCPgetGlobalNumElements(); tpetra_pmap = input_pmap; - pmap = TpetraPartitionMap(tpetra_pmap); + pmap = TpetraPartitionMap(tpetra_pmap); own_comms = false; data_setup(dim1, dim2, dim3, dim4, dim5, tag_string); } @@ -908,7 +881,7 @@ TpetraDCArray::TpetraDCArray(Teuchos::RCPgetGlobalNumElements(); tpetra_pmap = input_pmap; - pmap = TpetraPartitionMap(tpetra_pmap); + pmap = TpetraPartitionMap(tpetra_pmap); own_comms = false; data_setup(dim1, dim2, dim3, dim4, dim5, dim6, tag_string); } @@ -916,7 +889,7 @@ TpetraDCArray::TpetraDCArray(Teuchos::RCP TpetraDCArray::TpetraDCArray(const TpetraDCArray &super_vector, - const TpetraPartitionMap &sub_pmap, size_t start_index){ + const TpetraPartitionMap &sub_pmap, size_t start_index){ mpi_comm_ = sub_pmap.mpi_comm_; global_dim1_ = sub_pmap.num_global_; tpetra_pmap = sub_pmap.tpetra_map; @@ -1403,10 +1376,10 @@ void TpetraDCArray::update_device() { } template -void TpetraDCArray::own_comm_setup(TpetraPartitionMap &other_pmap) { +void TpetraDCArray::own_comm_setup(TpetraPartitionMap &other_pmap) { own_comms = true; tpetra_comm_pmap = other_pmap.tpetra_map; - comm_pmap = TpetraPartitionMap(tpetra_comm_pmap); + comm_pmap = TpetraPartitionMap(tpetra_comm_pmap); int local_offset = tpetra_pmap->getLocalElement((tpetra_comm_pmap->getMinGlobalIndex())); tpetra_sub_vector = Teuchos::rcp(new MV(*tpetra_vector, tpetra_comm_pmap, local_offset)); submap_size_ = tpetra_comm_pmap->getLocalNumElements(); @@ -1418,7 +1391,7 @@ template ::own_comm_setup(Teuchos::RCP> other_pmap) { own_comms = true; tpetra_comm_pmap = other_pmap; - comm_pmap = TpetraPartitionMap(tpetra_comm_pmap); + comm_pmap = TpetraPartitionMap(tpetra_comm_pmap); int local_offset = tpetra_pmap->getLocalElement((tpetra_comm_pmap->getMinGlobalIndex())); tpetra_sub_vector = Teuchos::rcp(new MV(*tpetra_vector, tpetra_comm_pmap, local_offset)); submap_size_ = tpetra_comm_pmap->getLocalNumElements(); @@ -1530,7 +1503,7 @@ void TpetraDCArray::repartition_vector() { // partitioned_node_coords_one_to_one_distributed->doImport(*partitioned_node_coords_distributed, importer_one_to_one, Tpetra::INSERT); // node_coords_distributed = partitioned_node_coords_one_to_one_distributed; tpetra_pmap = Teuchos::rcp(new Tpetra::Map(*partitioned_map_one_to_one)); - pmap = TpetraPartitionMap(tpetra_pmap); + pmap = TpetraPartitionMap(tpetra_pmap); own_comms = false; //reset submap setup now that full map is different dims_[0] = tpetra_pmap->getLocalNumElements(); length_ = (dims_[0] * component_length_); @@ -1620,8 +1593,8 @@ class TpetraDFArray { bool own_comms; //This Mapped MPI Array contains its own communication plan; just call array_comms() void set_mpi_type(); - TpetraPartitionMap pmap; - TpetraPartitionMap comm_pmap; + TpetraPartitionMap pmap; + TpetraPartitionMap comm_pmap; Teuchos::RCP> tpetra_pmap; Teuchos::RCP> tpetra_comm_pmap; Teuchos::RCP tpetra_vector; @@ -1662,31 +1635,31 @@ class TpetraDFArray { /* Specified Map Constructors*/ //Tpetra type for 1D case with a partition map passed in - TpetraDFArray(TpetraPartitionMap &input_pmap, + TpetraDFArray(TpetraPartitionMap &input_pmap, const std::string& tag_string = DEFAULTSTRINGARRAY); //2D Tpetra type with a partition map passed in - TpetraDFArray(TpetraPartitionMap &input_pmap, size_t dim1, + TpetraDFArray(TpetraPartitionMap &input_pmap, size_t dim1, const std::string& tag_string = DEFAULTSTRINGARRAY); //3D Tpetra type with a partition map passed in - TpetraDFArray(TpetraPartitionMap &input_pmap, size_t dim1, + TpetraDFArray(TpetraPartitionMap &input_pmap, size_t dim1, size_t dim2, const std::string& tag_string = DEFAULTSTRINGARRAY); //4D Tpetra type with a partition map passed in - TpetraDFArray(TpetraPartitionMap &input_pmap, size_t dim1, size_t dim2, size_t dim3, + TpetraDFArray(TpetraPartitionMap &input_pmap, size_t dim1, size_t dim2, size_t dim3, const std::string& tag_string = DEFAULTSTRINGARRAY); //5D Tpetra type with a partition map passed in - TpetraDFArray(TpetraPartitionMap &input_pmap, size_t dim1, size_t dim2, size_t dim3, + TpetraDFArray(TpetraPartitionMap &input_pmap, size_t dim1, size_t dim2, size_t dim3, size_t dim4, const std::string& tag_string = DEFAULTSTRINGARRAY); //6D Tpetra type with a partition map passed in - TpetraDFArray(TpetraPartitionMap &input_pmap, size_t dim1, size_t dim2, size_t dim3, + TpetraDFArray(TpetraPartitionMap &input_pmap, size_t dim1, size_t dim2, size_t dim3, size_t dim4, size_t dim5, const std::string& tag_string = DEFAULTSTRINGARRAY); //7D Tpetra type with a partition map passed in - TpetraDFArray(TpetraPartitionMap &input_pmap, size_t dim1, size_t dim2, size_t dim3, + TpetraDFArray(TpetraPartitionMap &input_pmap, size_t dim1, size_t dim2, size_t dim3, size_t dim4, size_t dim5, size_t dim6, const std::string& tag_string = DEFAULTSTRINGARRAY); //Tpetra type for 1D case(still allocates dim0 by 1 using **T); this constructor takes an RCP pointer to a Tpetra Map directly @@ -1719,7 +1692,7 @@ class TpetraDFArray { //construct an array that views a contiguous subset of another array; start index denotes the local index in super vector to start the sub view TpetraDFArray(const TpetraDFArray &super_vector, - const TpetraPartitionMap &sub_pmap, size_t start_index); + const TpetraPartitionMap &sub_pmap, size_t start_index); // 1D array setup void data_setup(const std::string& tag_string); @@ -1750,7 +1723,7 @@ class TpetraDFArray { void own_comm_setup(Teuchos::RCP> other_pmap); //only call if the map in the arg is a uniquely owned submap of the arrays map - void own_comm_setup(TpetraPartitionMap &other_pmap); //only call if the map in the arg is a uniquely owned submap of the arrays map + void own_comm_setup(TpetraPartitionMap &other_pmap); //only call if the map in the arg is a uniquely owned submap of the arrays map void perform_comms(); @@ -1878,7 +1851,7 @@ TpetraDFArray::TpetraDFArray(size_t dim0, const global_dim1_ = dim0; Teuchos::RCP> teuchos_comm = Teuchos::rcp(new Teuchos::MpiComm(mpi_comm_)); tpetra_pmap = Teuchos::rcp(new Tpetra::Map((long long int) dim0, 0, teuchos_comm)); - pmap = TpetraPartitionMap(tpetra_pmap); + pmap = TpetraPartitionMap(tpetra_pmap); own_comms = false; data_setup(tag_string); } @@ -1890,7 +1863,7 @@ TpetraDFArray::TpetraDFArray(size_t dim0, size_ global_dim1_ = dim0; Teuchos::RCP> teuchos_comm = Teuchos::rcp(new Teuchos::MpiComm(mpi_comm_)); tpetra_pmap = Teuchos::rcp(new Tpetra::Map((long long int) dim0, 0, teuchos_comm)); - pmap = TpetraPartitionMap(tpetra_pmap); + pmap = TpetraPartitionMap(tpetra_pmap); own_comms = false; data_setup(dim1, tag_string); } @@ -1902,7 +1875,7 @@ TpetraDFArray::TpetraDFArray(size_t dim0, size_ global_dim1_ = dim0; Teuchos::RCP> teuchos_comm = Teuchos::rcp(new Teuchos::MpiComm(mpi_comm_)); tpetra_pmap = Teuchos::rcp(new Tpetra::Map((long long int) dim0, 0, teuchos_comm)); - pmap = TpetraPartitionMap(tpetra_pmap); + pmap = TpetraPartitionMap(tpetra_pmap); own_comms = false; data_setup(dim1, dim2, tag_string); } @@ -1914,7 +1887,7 @@ TpetraDFArray::TpetraDFArray(size_t dim0, size_ global_dim1_ = dim0; Teuchos::RCP> teuchos_comm = Teuchos::rcp(new Teuchos::MpiComm(mpi_comm_)); tpetra_pmap = Teuchos::rcp(new Tpetra::Map((long long int) dim0, 0, teuchos_comm)); - pmap = TpetraPartitionMap(tpetra_pmap); + pmap = TpetraPartitionMap(tpetra_pmap); own_comms = false; data_setup(dim1, dim2, dim3, tag_string); } @@ -1927,7 +1900,7 @@ TpetraDFArray::TpetraDFArray(size_t dim0, size_ global_dim1_ = dim0; Teuchos::RCP> teuchos_comm = Teuchos::rcp(new Teuchos::MpiComm(mpi_comm_)); tpetra_pmap = Teuchos::rcp(new Tpetra::Map((long long int) dim0, 0, teuchos_comm)); - pmap = TpetraPartitionMap(tpetra_pmap); + pmap = TpetraPartitionMap(tpetra_pmap); own_comms = false; data_setup(dim1, dim2, dim3, dim4, tag_string); } @@ -1940,7 +1913,7 @@ TpetraDFArray::TpetraDFArray(size_t dim0, size_ global_dim1_ = dim0; Teuchos::RCP> teuchos_comm = Teuchos::rcp(new Teuchos::MpiComm(mpi_comm_)); tpetra_pmap = Teuchos::rcp(new Tpetra::Map((long long int) dim0, 0, teuchos_comm)); - pmap = TpetraPartitionMap(tpetra_pmap); + pmap = TpetraPartitionMap(tpetra_pmap); own_comms = false; data_setup(dim1, dim2, dim3, dim4, dim5, tag_string); } @@ -1953,14 +1926,14 @@ TpetraDFArray::TpetraDFArray(size_t dim0, size_ global_dim1_ = dim0; Teuchos::RCP> teuchos_comm = Teuchos::rcp(new Teuchos::MpiComm(mpi_comm_)); tpetra_pmap = Teuchos::rcp(new Tpetra::Map((long long int) dim0, 0, teuchos_comm)); - pmap = TpetraPartitionMap(tpetra_pmap); + pmap = TpetraPartitionMap(tpetra_pmap); own_comms = false; data_setup(dim1, dim2, dim3, dim4, dim5, dim6, tag_string); } // Overloaded 1D constructor where you provide a partition map template -TpetraDFArray::TpetraDFArray(TpetraPartitionMap &input_pmap, +TpetraDFArray::TpetraDFArray(TpetraPartitionMap &input_pmap, const std::string& tag_string) { mpi_comm_ = input_pmap.mpi_comm_; global_dim1_ = input_pmap.num_global_; @@ -1972,7 +1945,7 @@ TpetraDFArray::TpetraDFArray(TpetraPartitionMap // Overloaded 2D constructor where you provide a partition map template -TpetraDFArray::TpetraDFArray(TpetraPartitionMap &input_pmap, +TpetraDFArray::TpetraDFArray(TpetraPartitionMap &input_pmap, size_t dim1, const std::string& tag_string) { mpi_comm_ = input_pmap.mpi_comm_; global_dim1_ = input_pmap.num_global_; @@ -1984,7 +1957,7 @@ TpetraDFArray::TpetraDFArray(TpetraPartitionMap // Overloaded 3D constructor where you provide a partition map template -TpetraDFArray::TpetraDFArray(TpetraPartitionMap &input_pmap, +TpetraDFArray::TpetraDFArray(TpetraPartitionMap &input_pmap, size_t dim1, size_t dim2, const std::string& tag_string) { mpi_comm_ = input_pmap.mpi_comm_; global_dim1_ = input_pmap.num_global_; @@ -1996,7 +1969,7 @@ TpetraDFArray::TpetraDFArray(TpetraPartitionMap // Overloaded 4D constructor where you provide a partition map template -TpetraDFArray::TpetraDFArray(TpetraPartitionMap &input_pmap, +TpetraDFArray::TpetraDFArray(TpetraPartitionMap &input_pmap, size_t dim1, size_t dim2, size_t dim3, const std::string& tag_string) { mpi_comm_ = input_pmap.mpi_comm_; global_dim1_ = input_pmap.num_global_; @@ -2008,7 +1981,7 @@ TpetraDFArray::TpetraDFArray(TpetraPartitionMap // Overloaded 5D constructor where you provide a partition map template -TpetraDFArray::TpetraDFArray(TpetraPartitionMap &input_pmap, +TpetraDFArray::TpetraDFArray(TpetraPartitionMap &input_pmap, size_t dim1, size_t dim2, size_t dim3, size_t dim4, const std::string& tag_string) { mpi_comm_ = input_pmap.mpi_comm_; global_dim1_ = input_pmap.num_global_; @@ -2020,7 +1993,7 @@ TpetraDFArray::TpetraDFArray(TpetraPartitionMap // Overloaded 6D constructor where you provide a partition map template -TpetraDFArray::TpetraDFArray(TpetraPartitionMap &input_pmap, +TpetraDFArray::TpetraDFArray(TpetraPartitionMap &input_pmap, size_t dim1, size_t dim2, size_t dim3, size_t dim4, size_t dim5, const std::string& tag_string) { mpi_comm_ = input_pmap.mpi_comm_; @@ -2033,7 +2006,7 @@ TpetraDFArray::TpetraDFArray(TpetraPartitionMap // Overloaded 7D constructor where you provide a partition map template -TpetraDFArray::TpetraDFArray(TpetraPartitionMap &input_pmap, +TpetraDFArray::TpetraDFArray(TpetraPartitionMap &input_pmap, size_t dim1, size_t dim2, size_t dim3, size_t dim4, size_t dim5, size_t dim6, const std::string& tag_string) { mpi_comm_ = input_pmap.mpi_comm_; @@ -2051,7 +2024,7 @@ TpetraDFArray::TpetraDFArray(Teuchos::RCPgetGlobalNumElements(); tpetra_pmap = input_pmap; - pmap = TpetraPartitionMap(tpetra_pmap); + pmap = TpetraPartitionMap(tpetra_pmap); own_comms = false; data_setup(tag_string); } @@ -2063,7 +2036,7 @@ TpetraDFArray::TpetraDFArray(Teuchos::RCPgetGlobalNumElements(); tpetra_pmap = input_pmap; - pmap = TpetraPartitionMap(tpetra_pmap); + pmap = TpetraPartitionMap(tpetra_pmap); own_comms = false; data_setup(dim1, tag_string); } @@ -2075,7 +2048,7 @@ TpetraDFArray::TpetraDFArray(Teuchos::RCPgetGlobalNumElements(); tpetra_pmap = input_pmap; - pmap = TpetraPartitionMap(tpetra_pmap); + pmap = TpetraPartitionMap(tpetra_pmap); own_comms = false; data_setup(dim1, dim2, tag_string); } @@ -2087,7 +2060,7 @@ TpetraDFArray::TpetraDFArray(Teuchos::RCPgetGlobalNumElements(); tpetra_pmap = input_pmap; - pmap = TpetraPartitionMap(tpetra_pmap); + pmap = TpetraPartitionMap(tpetra_pmap); own_comms = false; data_setup(dim1, dim2, dim3, tag_string); } @@ -2099,7 +2072,7 @@ TpetraDFArray::TpetraDFArray(Teuchos::RCPgetGlobalNumElements(); tpetra_pmap = input_pmap; - pmap = TpetraPartitionMap(tpetra_pmap); + pmap = TpetraPartitionMap(tpetra_pmap); own_comms = false; data_setup(dim1, dim2, dim3, dim4, tag_string); } @@ -2112,7 +2085,7 @@ TpetraDFArray::TpetraDFArray(Teuchos::RCPgetGlobalNumElements(); tpetra_pmap = input_pmap; - pmap = TpetraPartitionMap(tpetra_pmap); + pmap = TpetraPartitionMap(tpetra_pmap); own_comms = false; data_setup(dim1, dim2, dim3, dim4, dim5, tag_string); } @@ -2125,7 +2098,7 @@ TpetraDFArray::TpetraDFArray(Teuchos::RCPgetGlobalNumElements(); tpetra_pmap = input_pmap; - pmap = TpetraPartitionMap(tpetra_pmap); + pmap = TpetraPartitionMap(tpetra_pmap); own_comms = false; data_setup(dim1, dim2, dim3, dim4, dim5, dim6, tag_string); } @@ -2133,7 +2106,7 @@ TpetraDFArray::TpetraDFArray(Teuchos::RCP TpetraDFArray::TpetraDFArray(const TpetraDFArray &super_vector, - const TpetraPartitionMap &sub_pmap, size_t start_index){ + const TpetraPartitionMap &sub_pmap, size_t start_index){ mpi_comm_ = sub_pmap.mpi_comm_; global_dim1_ = sub_pmap.num_global_; tpetra_pmap = sub_pmap.tpetra_map; @@ -2630,10 +2603,10 @@ void TpetraDFArray::update_device() { } template -void TpetraDFArray::own_comm_setup(TpetraPartitionMap &other_pmap) { +void TpetraDFArray::own_comm_setup(TpetraPartitionMap &other_pmap) { own_comms = true; tpetra_comm_pmap = other_pmap.tpetra_map; - comm_pmap = TpetraPartitionMap(tpetra_comm_pmap); + comm_pmap = TpetraPartitionMap(tpetra_comm_pmap); int local_offset = tpetra_pmap->getLocalElement((tpetra_comm_pmap->getMinGlobalIndex())); tpetra_sub_vector = Teuchos::rcp(new MV(*tpetra_vector, tpetra_comm_pmap, local_offset)); submap_size_ = tpetra_comm_pmap->getLocalNumElements(); @@ -2645,7 +2618,7 @@ template ::own_comm_setup(Teuchos::RCP> other_pmap) { own_comms = true; tpetra_comm_pmap = other_pmap; - comm_pmap = TpetraPartitionMap(tpetra_comm_pmap); + comm_pmap = TpetraPartitionMap(tpetra_comm_pmap); int local_offset = tpetra_pmap->getLocalElement((tpetra_comm_pmap->getMinGlobalIndex())); tpetra_sub_vector = Teuchos::rcp(new MV(*tpetra_vector, tpetra_comm_pmap, local_offset)); submap_size_ = tpetra_comm_pmap->getLocalNumElements(); @@ -2757,7 +2730,7 @@ void TpetraDFArray::repartition_vector() { // partitioned_node_coords_one_to_one_distributed->doImport(*partitioned_node_coords_distributed, importer_one_to_one, Tpetra::INSERT); // node_coords_distributed = partitioned_node_coords_one_to_one_distributed; tpetra_pmap = Teuchos::rcp(new Tpetra::Map(*partitioned_map_one_to_one)); - pmap = TpetraPartitionMap(tpetra_pmap); + pmap = TpetraPartitionMap(tpetra_pmap); own_comms = false; //reset submap setup now that full map is different dims_[0] = tpetra_pmap->getLocalNumElements(); length_ = (dims_[0] * component_length_); @@ -2855,9 +2828,9 @@ class TpetraCArray { bool own_comms; //This Mapped MPI Array contains its own communication plan; just call array_comms() void set_mpi_type(); - TpetraPartitionMap pmap; - TpetraPartitionMap column_pmap; - TpetraPartitionMap comm_pmap; + TpetraPartitionMap pmap; + TpetraPartitionMap column_pmap; + TpetraPartitionMap comm_pmap; Teuchos::RCP> tpetra_pmap; Teuchos::RCP> tpetra_column_pmap; Teuchos::RCP> tpetra_comm_pmap; @@ -2878,7 +2851,7 @@ class TpetraCArray { const std::string& tag_string = DEFAULTSTRINGARRAY, MPI_Comm mpi_comm = MPI_COMM_WORLD); //CRS matrix constructor with arbitrary row graph and column map supplied - TpetraCArray(TpetraPartitionMap &input_pmap, size_t dim1, const std::string& tag_string = DEFAULTSTRINGARRAY); + TpetraCArray(TpetraPartitionMap &input_pmap, size_t dim1, const std::string& tag_string = DEFAULTSTRINGARRAY); //CRS matric constructor with arbitrary row graph; builds column map for you and thus one less arg TpetraCArray(Teuchos::RCP> input_pmap, size_t dim1, const std::string& tag_string = DEFAULTSTRINGARRAY); @@ -2963,7 +2936,7 @@ TpetraCArray::TpetraCArray(size_t global_dim0, global_dim0_ = global_dim0; Teuchos::RCP> teuchos_comm = Teuchos::rcp(new Teuchos::MpiComm(mpi_comm_)); tpetra_pmap = Teuchos::rcp(new Tpetra::Map((long long int) global_dim0, 0, teuchos_comm)); - pmap = TpetraPartitionMap(tpetra_pmap); + pmap = TpetraPartitionMap(tpetra_pmap); dims_[0] = tpetra_pmap->getLocalNumElements(); dims_[1] = dim1; order_ = 2; @@ -3028,7 +3001,7 @@ TpetraCArray::TpetraCArray(size_t global_dim0, // Overloaded 2D constructor where you provide a partition map template -TpetraCArray::TpetraCArray(TpetraPartitionMap &input_pmap, +TpetraCArray::TpetraCArray(TpetraPartitionMap &input_pmap, size_t dim1, const std::string& tag_string) { } @@ -3240,9 +3213,9 @@ class TpetraCRSMatrix { bool own_comms; //This Mapped MPI Array contains its own communication plan; just call array_comms() void set_mpi_type(); - TpetraPartitionMap pmap; - TpetraPartitionMap column_pmap; - TpetraPartitionMap comm_pmap; + TpetraPartitionMap pmap; + TpetraPartitionMap column_pmap; + TpetraPartitionMap comm_pmap; Teuchos::RCP> tpetra_pmap; Teuchos::RCP> tpetra_column_pmap; Teuchos::RCP> tpetra_comm_pmap; @@ -3266,7 +3239,7 @@ class TpetraCRSMatrix { // const std::string& tag_string = DEFAULTSTRINGARRAY, MPI_Comm mpi_comm = MPI_COMM_WORLD); //CRS matrix constructor with arbitrary row graph and column map supplied - TpetraCRSMatrix(TpetraPartitionMap &input_pmap, size_t dim1, const std::string& tag_string = DEFAULTSTRINGARRAY); + TpetraCRSMatrix(TpetraPartitionMap &input_pmap, size_t dim1, const std::string& tag_string = DEFAULTSTRINGARRAY); //CRS matric constructor with arbitrary row graph; builds column map for you and thus one less arg TpetraCRSMatrix(Teuchos::RCP> input_pmap, size_t dim1, const std::string& tag_string = DEFAULTSTRINGARRAY); @@ -3447,7 +3420,7 @@ TpetraCRSMatrix::TpetraCRSMatrix(): tpetra_pmap // Overloaded 2D constructor where you provide a partition map template -TpetraCRSMatrix::TpetraCRSMatrix(TpetraPartitionMap &input_pmap, +TpetraCRSMatrix::TpetraCRSMatrix(TpetraPartitionMap &input_pmap, size_t dim1, const std::string& tag_string) { // mpi_comm_ = input_pmap.mpi_comm_; // global_dim1_ = input_pmap.num_global_; @@ -3770,6 +3743,122 @@ TpetraCommunicationPlan::~TpetraCommunicationPl // End of TpetraCommunicationPlan //////////////////////////////////////////////////////////////////////////////// +///////////////////////// +/* TpetraTpetraLRCommunicationPlan: Class storing relevant data and functions to perform comms between two different Tpetra MATAR MPI types. + The object for this class should not be reconstructed if the same comm plan is needed repeatedly; the setup is expensive. + The comms routines such as execute_comms can be called repeatedly to avoid repeated setup of the plan.*/ +///////////////////////// +template +class TpetraLRCommunicationPlan { + + // this is manage + using TArray1D = Kokkos::DualView ; + +protected: + TpetraDCArray destination_vector_; + TpetraDCArray source_vector_; + + /*forward comms means communicating data to a vector that doesn't have a unique distribution of its global + indices amongst processes from a vector that does have a unique distribution amongst processes. + An example of forward comms in a finite element application would be communicating ghost data from + the vector of local data. + + reverse comms means communicating data to a vector that has a unique distribution of its global + indices amongst processes from a vector that does not have a unique distribution amongst processes. + An example of reverse comms in a finite element application would be communicating force contributions from ghost + indices via summation to the entries of the uniquely owned vector that stores final tallies of forces. + */ + bool reverse_comms_flag; //default is false + Teuchos::RCP> importer; // tpetra comm object + Teuchos::RCP> exporter; // tpetra reverse comm object + +public: + + enum combine_mode { INSERT, SUM, ABSMAX, REPLACE, MIN, ADD_REPLACE }; + combine_mode combine_mode_; + + TpetraLRCommunicationPlan(); + + //Copy Constructor + TpetraLRCommunicationPlan(const TpetraLRCommunicationPlan &temp){ + *this = temp; + } + + TpetraLRCommunicationPlan(TpetraDCArray destination_vector, + TpetraDCArray source_vector, bool reverse_comms=false, combine_mode mode=INSERT); + + KOKKOS_INLINE_FUNCTION + TpetraLRCommunicationPlan& operator=(const TpetraLRCommunicationPlan& temp); + + // Deconstructor + virtual KOKKOS_INLINE_FUNCTION + ~TpetraLRCommunicationPlan (); + + void execute_comms(); +}; // End of TpetraLRCommunicationPlan + + +// Default constructor +template +TpetraLRCommunicationPlan::TpetraLRCommunicationPlan() { + +} + +// Overloaded 1D constructor +template +TpetraLRCommunicationPlan::TpetraLRCommunicationPlan(TpetraDCArray destination_vector, + TpetraDCArray source_vector, bool reverse_comms, combine_mode mode) { + combine_mode_ = mode; + reverse_comms_flag = reverse_comms; + destination_vector_ = destination_vector; + source_vector_ = source_vector; + + //setup Tpetra comm object + if(reverse_comms){ + // create export object; completes setup + exporter = Teuchos::rcp(new Tpetra::Export(source_vector_.tpetra_pmap, destination_vector_.tpetra_pmap)); + } + else{ + // create import object; completes setup + importer = Teuchos::rcp(new Tpetra::Import(source_vector_.tpetra_pmap, destination_vector_.tpetra_pmap)); + } +} + + +template +KOKKOS_INLINE_FUNCTION +TpetraLRCommunicationPlan& TpetraLRCommunicationPlan::operator= (const TpetraLRCommunicationPlan& temp) { + + // Do nothing if the assignment is of the form x = x + if (this != &temp) { + reverse_comms_flag = temp.reverse_comms_flag; + combine_mode_ = temp.combine_mode_; + destination_vector_ = temp.destination_vector_; + source_vector_ = temp.source_vector_; + } + + return *this; +} + +//perform comms +template +void TpetraLRCommunicationPlan::execute_comms(){ + if(reverse_comms_flag){ + destination_vector_.tpetra_vector->doExport(*source_vector_.tpetra_vector, *exporter, Tpetra::INSERT, true); + } + else{ + destination_vector_.tpetra_vector->doImport(*source_vector_.tpetra_vector, *importer, Tpetra::INSERT); + } +} + +template +KOKKOS_INLINE_FUNCTION +TpetraLRCommunicationPlan::~TpetraLRCommunicationPlan() {} + +//////////////////////////////////////////////////////////////////////////////// +// End of TpetraLRCommunicationPlan +//////////////////////////////////////////////////////////////////////////////// + } // end namespace #endif // end if have MPI