From 96850ae470e0476ac69a13e50a154144dee55223 Mon Sep 17 00:00:00 2001 From: Phuzzyhead Date: Fri, 24 Nov 2017 17:07:33 +0100 Subject: [PATCH 001/150] initial commit to check fork --- tools/install_boost.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/install_boost.sh b/tools/install_boost.sh index a803778..4f30ddf 100755 --- a/tools/install_boost.sh +++ b/tools/install_boost.sh @@ -33,14 +33,14 @@ # http://www.boost.org/more/getting_started/unix-variants.html#prepare-to-use-a-boost-library-binary # https://software.intel.com/en-us/articles/building-the-boost-library-to-run-natively-on-intelr-xeon-phitm-coprocessor -DOWNLOAD_PATH=$HOME/Downloads -INSTALL_PATH=$HOME/Software +DOWNLOAD_PATH=$HOME/boost/ +INSTALL_PATH=$HOME/software NO_MIC=false # set to true, to disable building Boost for Xeon Phi BASHRC_FILE=$HOME/.bashrc # set to /dev/null to disable, or to any other file to manually merge the needed changes into your .bashrc BOOST_BUILD_OPTIONS="-j8" # concurrent build with up to 8 commands BOOST_NAME=boost -BOOST_VERSION=1_56_0 +BOOST_VERSION=1_65_1 BOOST_MIC_SUFFIX=mic BOOST_ARCHIVE=${BOOST_NAME}_${BOOST_VERSION} # NOTE: without tar.bz2 From 359a6cb8f267b084c1fd6d24f397738a076da230 Mon Sep 17 00:00:00 2001 From: Daniel Deppisch Date: Fri, 24 Nov 2017 17:07:33 +0100 Subject: [PATCH 002/150] initial commit to check fork --- tools/install_boost.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/install_boost.sh b/tools/install_boost.sh index a803778..4f30ddf 100755 --- a/tools/install_boost.sh +++ b/tools/install_boost.sh @@ -33,14 +33,14 @@ # http://www.boost.org/more/getting_started/unix-variants.html#prepare-to-use-a-boost-library-binary # https://software.intel.com/en-us/articles/building-the-boost-library-to-run-natively-on-intelr-xeon-phitm-coprocessor -DOWNLOAD_PATH=$HOME/Downloads -INSTALL_PATH=$HOME/Software +DOWNLOAD_PATH=$HOME/boost/ +INSTALL_PATH=$HOME/software NO_MIC=false # set to true, to disable building Boost for Xeon Phi BASHRC_FILE=$HOME/.bashrc # set to /dev/null to disable, or to any other file to manually merge the needed changes into your .bashrc BOOST_BUILD_OPTIONS="-j8" # concurrent build with up to 8 commands BOOST_NAME=boost -BOOST_VERSION=1_56_0 +BOOST_VERSION=1_65_1 BOOST_MIC_SUFFIX=mic BOOST_ARCHIVE=${BOOST_NAME}_${BOOST_VERSION} # NOTE: without tar.bz2 From a03e64186bec99a8fb97a8b53c6305b62ea0b79b Mon Sep 17 00:00:00 2001 From: Phuzzyhead Date: Thu, 14 Dec 2017 17:11:48 +0100 Subject: [PATCH 003/150] tds changes --- .gitignore | 1 + Jamroot | 4 ++++ include/ham/misc/migratable.hpp | 4 ++-- 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index db5131f..f83f745 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ bin/* benchmark/results/* +cmake-build-debug/* diff --git a/Jamroot b/Jamroot index f4bb375..39f7e78 100644 --- a/Jamroot +++ b/Jamroot @@ -116,6 +116,10 @@ exe active_msgs : active_msgs.cpp ; +exe active_msgs_over_file + : active_msgs_over_file.cpp boost_program_options + ; + exe ham_offload : ham_offload.cpp ham_offload_scif boost_program_options # : /mpi//mpi HAM_COMM_MPI diff --git a/include/ham/misc/migratable.hpp b/include/ham/misc/migratable.hpp index 012a99e..9ed002e 100644 --- a/include/ham/misc/migratable.hpp +++ b/include/ham/misc/migratable.hpp @@ -28,12 +28,12 @@ class migratable //: value(std::forward(arg)) // NOTE: compatible types are allowed : value(std::forward(arg)) // NOTE: compatible types are allowed { -// std::cout << "migratable-ctor: " << value << std::endl; + std::cout << "migratable-ctor: " << value << std::endl; } operator const T& () const { -// std::cout << "migratable-conversion: " << value << std::endl; + std::cout << "migratable-conversion: " << value << std::endl; return value; } private: From 9a1a2b41209c1ee836988b3936850a1ae23f5e67 Mon Sep 17 00:00:00 2001 From: Deppisch Date: Thu, 14 Dec 2017 17:11:48 +0100 Subject: [PATCH 004/150] tds changes --- .gitignore | 1 + Jamroot | 4 ++++ include/ham/misc/migratable.hpp | 4 ++-- 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index db5131f..f83f745 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ bin/* benchmark/results/* +cmake-build-debug/* diff --git a/Jamroot b/Jamroot index f4bb375..39f7e78 100644 --- a/Jamroot +++ b/Jamroot @@ -116,6 +116,10 @@ exe active_msgs : active_msgs.cpp ; +exe active_msgs_over_file + : active_msgs_over_file.cpp boost_program_options + ; + exe ham_offload : ham_offload.cpp ham_offload_scif boost_program_options # : /mpi//mpi HAM_COMM_MPI diff --git a/include/ham/misc/migratable.hpp b/include/ham/misc/migratable.hpp index 012a99e..9ed002e 100644 --- a/include/ham/misc/migratable.hpp +++ b/include/ham/misc/migratable.hpp @@ -28,12 +28,12 @@ class migratable //: value(std::forward(arg)) // NOTE: compatible types are allowed : value(std::forward(arg)) // NOTE: compatible types are allowed { -// std::cout << "migratable-ctor: " << value << std::endl; + std::cout << "migratable-ctor: " << value << std::endl; } operator const T& () const { -// std::cout << "migratable-conversion: " << value << std::endl; + std::cout << "migratable-conversion: " << value << std::endl; return value; } private: From 9d579d5ea6b1ece605e7309d657ed1b33c2d28df Mon Sep 17 00:00:00 2001 From: Phuzzyhead Date: Thu, 14 Dec 2017 18:28:18 +0100 Subject: [PATCH 005/150] added actives_msgs_over_file.cpp --- src/active_msgs_over_file.cpp | 195 ++++++++++++++++++++++++++++++++++ 1 file changed, 195 insertions(+) create mode 100644 src/active_msgs_over_file.cpp diff --git a/src/active_msgs_over_file.cpp b/src/active_msgs_over_file.cpp new file mode 100644 index 0000000..9b3cc4e --- /dev/null +++ b/src/active_msgs_over_file.cpp @@ -0,0 +1,195 @@ +// modified by Daniel Deppisch (deppisch@zib.de) from: +// active_msgs.cpp +// Copyright (c) 2013-2014 Matthias Noack (ma.noack.pr@gmail.com) +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include + +#include "ham/msg/active_msg_base.hpp" +#include "ham/msg/execution_policy.hpp" +#include "ham/msg/active_msg.hpp" +#include "ham/misc/migratable.hpp" + +using namespace std; + + +namespace ham { + + template<> + class migratable { + public: + migratable(const migratable &) = default; + + migratable(migratable &&) = default; + + migratable &operator=(const migratable &) = default; + + migratable &operator=(migratable &&) = default; + + // forward compatible arg into T's ctor + template + migratable(Compatible &&arg) { + std::cout << "migratable-ctor: " << arg << std::endl; + std::strcpy(value, arg.c_str()); + } + + operator std::string() const { + std::cout << "migratable-conversion: " << value << std::endl; + return value; + } + + private: + char value[256]; + }; +} // namespace ham + +// a simple message type for testing + +class MsgA : public ham::msg::active_msg { +public: + void operator()() { + cout << "MsgA::operator() successfully called." << endl; + // the message could perform some task here + // and possible send back a result afterwards, e.g. by + // - using data transferred as member inside the message + // - calling some communication layer + // - ... + } + + // the message could include members that are safe to transfer between the communicating entities +}; + +class MsgB : public ham::msg::active_msg { +public: + MsgB(const char* t_in, std::string text2) + : text2(text2) + { + std::strcpy(text, t_in); + } + + void operator()() { + cout << "MsgB::operator() successfully called." << endl; + cout << "Text: " << text << endl; + cout << "Text2: " << static_cast(text2) << endl; + // the message could perform some task here + // and possible send back a result afterwards, e.g. by + // - using data transferred as member inside the message + // - calling some communication layer + // - ... + } + // the message could include members that are safe to transfer between the communicating entities +private: + char text[256]; + ham::migratable text2; +}; + +// a simple test which simulates a communication channel via filesystem +// of course, this does NOT test the communication backend +// this may be used to write and read a message from filesystem to simulate communication between different binaries without a supported backend + +// write message to file and shut down +template +bool write_active_msg(Msg& func, std::string const & filename) +{ + size_t msgSize = sizeof(func); + + + std::ofstream b_stream(filename.c_str(), std::fstream::out | std::fstream::binary); + + if (b_stream) { + b_stream.write(reinterpret_cast(&func), msgSize); + return (b_stream.good()); + } + + return false; +} + +// read message from file and execute +bool read_active_msg(std::string const & filename) +{ + std::ifstream b_stream(filename.c_str(), std::fstream::in | std::fstream::binary); + b_stream.seekg(0, ios::end); + int bufferSize = b_stream.tellg(); + char* buffer = new char[bufferSize]; + b_stream.seekg(0, ios::beg); + + if (!b_stream.read(buffer, bufferSize)) { + cout << "ERROR: reading file " << filename << " failed" << endl; + return false; + } + + // simulate reading from the channel, thereby we cast the buffer back to the known base class of all active messages + auto functor = *reinterpret_cast(buffer); + + // This is where the magick happens. + // Calling the buffer as an active_msg_base functor with the receive buffer + // as argument triggers a handler look-up, followed by the execution of + // that handler (which is defined by the execution policy of the actual + // message type). The handler can perform a safe upcast of the buffer to + // the actual type of the message and directly execute it as functor, + // enqeue it somewhere for further processing, or whatever a policy + // specifies. + functor(buffer); + + delete [] buffer; + + return true; +} + + +int main (int argc, char * argv[]) { + + // initialise active message handler address conversion data + ham::msg::msg_handler_registry::init(); + + // print message registry data + ham::msg::msg_handler_registry::print_handler_map(std::cout); // generated at static-init-time + ham::msg::msg_handler_registry::print_handler_vector(std::cout); // generated by the init-call above + + + + // filename to be used + std::string filename; + std::string text; + + // command line handling + boost::program_options::options_description desc("Options"); + desc.add_options() + ("file,f", boost::program_options::value(&filename), "specify file name (default: \"msgfile\"") + ("write,w", "make this process write a message to file") + ("read,r", "make this process read a message from file") + ("help,h", "print this help information") + ("text,t", boost::program_options::value(&text), "add some text to display when executing message"); + + boost::program_options::variables_map vm; + boost::program_options::store(boost::program_options::command_line_parser(argc, argv).options(desc).allow_unregistered().run(), vm); + boost::program_options::notify(vm); + + if (!vm.count("file")) { + filename = "msgfile"; + } + + // simple message type + MsgA fA; + // extended message type + MsgB fB(text.c_str(), "asdfasdasd"); + + if(vm.count("write")) { + if(vm.count("text")) { + write_active_msg(fB, filename); + } else { + write_active_msg(fA, filename); + } + } else if (vm.count("read")) { + read_active_msg(filename); + } else { + cout << "ERROR: did not specify whether process should write or read." << endl; + } + + return 0; +} + From 0a3a2c23aabc91fba56ebcdfa94994f13b175feb Mon Sep 17 00:00:00 2001 From: Deppisch Date: Thu, 14 Dec 2017 18:28:18 +0100 Subject: [PATCH 006/150] added actives_msgs_over_file.cpp --- src/active_msgs_over_file.cpp | 195 ++++++++++++++++++++++++++++++++++ 1 file changed, 195 insertions(+) create mode 100644 src/active_msgs_over_file.cpp diff --git a/src/active_msgs_over_file.cpp b/src/active_msgs_over_file.cpp new file mode 100644 index 0000000..9b3cc4e --- /dev/null +++ b/src/active_msgs_over_file.cpp @@ -0,0 +1,195 @@ +// modified by Daniel Deppisch (deppisch@zib.de) from: +// active_msgs.cpp +// Copyright (c) 2013-2014 Matthias Noack (ma.noack.pr@gmail.com) +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include + +#include "ham/msg/active_msg_base.hpp" +#include "ham/msg/execution_policy.hpp" +#include "ham/msg/active_msg.hpp" +#include "ham/misc/migratable.hpp" + +using namespace std; + + +namespace ham { + + template<> + class migratable { + public: + migratable(const migratable &) = default; + + migratable(migratable &&) = default; + + migratable &operator=(const migratable &) = default; + + migratable &operator=(migratable &&) = default; + + // forward compatible arg into T's ctor + template + migratable(Compatible &&arg) { + std::cout << "migratable-ctor: " << arg << std::endl; + std::strcpy(value, arg.c_str()); + } + + operator std::string() const { + std::cout << "migratable-conversion: " << value << std::endl; + return value; + } + + private: + char value[256]; + }; +} // namespace ham + +// a simple message type for testing + +class MsgA : public ham::msg::active_msg { +public: + void operator()() { + cout << "MsgA::operator() successfully called." << endl; + // the message could perform some task here + // and possible send back a result afterwards, e.g. by + // - using data transferred as member inside the message + // - calling some communication layer + // - ... + } + + // the message could include members that are safe to transfer between the communicating entities +}; + +class MsgB : public ham::msg::active_msg { +public: + MsgB(const char* t_in, std::string text2) + : text2(text2) + { + std::strcpy(text, t_in); + } + + void operator()() { + cout << "MsgB::operator() successfully called." << endl; + cout << "Text: " << text << endl; + cout << "Text2: " << static_cast(text2) << endl; + // the message could perform some task here + // and possible send back a result afterwards, e.g. by + // - using data transferred as member inside the message + // - calling some communication layer + // - ... + } + // the message could include members that are safe to transfer between the communicating entities +private: + char text[256]; + ham::migratable text2; +}; + +// a simple test which simulates a communication channel via filesystem +// of course, this does NOT test the communication backend +// this may be used to write and read a message from filesystem to simulate communication between different binaries without a supported backend + +// write message to file and shut down +template +bool write_active_msg(Msg& func, std::string const & filename) +{ + size_t msgSize = sizeof(func); + + + std::ofstream b_stream(filename.c_str(), std::fstream::out | std::fstream::binary); + + if (b_stream) { + b_stream.write(reinterpret_cast(&func), msgSize); + return (b_stream.good()); + } + + return false; +} + +// read message from file and execute +bool read_active_msg(std::string const & filename) +{ + std::ifstream b_stream(filename.c_str(), std::fstream::in | std::fstream::binary); + b_stream.seekg(0, ios::end); + int bufferSize = b_stream.tellg(); + char* buffer = new char[bufferSize]; + b_stream.seekg(0, ios::beg); + + if (!b_stream.read(buffer, bufferSize)) { + cout << "ERROR: reading file " << filename << " failed" << endl; + return false; + } + + // simulate reading from the channel, thereby we cast the buffer back to the known base class of all active messages + auto functor = *reinterpret_cast(buffer); + + // This is where the magick happens. + // Calling the buffer as an active_msg_base functor with the receive buffer + // as argument triggers a handler look-up, followed by the execution of + // that handler (which is defined by the execution policy of the actual + // message type). The handler can perform a safe upcast of the buffer to + // the actual type of the message and directly execute it as functor, + // enqeue it somewhere for further processing, or whatever a policy + // specifies. + functor(buffer); + + delete [] buffer; + + return true; +} + + +int main (int argc, char * argv[]) { + + // initialise active message handler address conversion data + ham::msg::msg_handler_registry::init(); + + // print message registry data + ham::msg::msg_handler_registry::print_handler_map(std::cout); // generated at static-init-time + ham::msg::msg_handler_registry::print_handler_vector(std::cout); // generated by the init-call above + + + + // filename to be used + std::string filename; + std::string text; + + // command line handling + boost::program_options::options_description desc("Options"); + desc.add_options() + ("file,f", boost::program_options::value(&filename), "specify file name (default: \"msgfile\"") + ("write,w", "make this process write a message to file") + ("read,r", "make this process read a message from file") + ("help,h", "print this help information") + ("text,t", boost::program_options::value(&text), "add some text to display when executing message"); + + boost::program_options::variables_map vm; + boost::program_options::store(boost::program_options::command_line_parser(argc, argv).options(desc).allow_unregistered().run(), vm); + boost::program_options::notify(vm); + + if (!vm.count("file")) { + filename = "msgfile"; + } + + // simple message type + MsgA fA; + // extended message type + MsgB fB(text.c_str(), "asdfasdasd"); + + if(vm.count("write")) { + if(vm.count("text")) { + write_active_msg(fB, filename); + } else { + write_active_msg(fA, filename); + } + } else if (vm.count("read")) { + read_active_msg(filename); + } else { + cout << "ERROR: did not specify whether process should write or read." << endl; + } + + return 0; +} + From 1b35b8fde8df8f10e5b35883d3271f1472345e83 Mon Sep 17 00:00:00 2001 From: Phuzzyhead Date: Tue, 27 Mar 2018 14:25:05 +0200 Subject: [PATCH 007/150] jamroot switch scif to mpi, uncomment ctor output from migratable --- Jamroot | 6 +++--- include/ham/misc/migratable.hpp | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Jamroot b/Jamroot index 39f7e78..1d27769 100644 --- a/Jamroot +++ b/Jamroot @@ -121,9 +121,9 @@ exe active_msgs_over_file ; exe ham_offload - : ham_offload.cpp ham_offload_scif boost_program_options -# : /mpi//mpi HAM_COMM_MPI - : scif HAM_COMM_SCIF + : ham_offload.cpp ham_offload_mpi boost_program_options + : /mpi//mpi HAM_COMM_MPI +# : scif HAM_COMM_SCIF ; exe ham_offload_explicit diff --git a/include/ham/misc/migratable.hpp b/include/ham/misc/migratable.hpp index 9ed002e..0a31b42 100644 --- a/include/ham/misc/migratable.hpp +++ b/include/ham/misc/migratable.hpp @@ -28,12 +28,12 @@ class migratable //: value(std::forward(arg)) // NOTE: compatible types are allowed : value(std::forward(arg)) // NOTE: compatible types are allowed { - std::cout << "migratable-ctor: " << value << std::endl; + // std::cout << "migratable-ctor: " << value << std::endl; } operator const T& () const { - std::cout << "migratable-conversion: " << value << std::endl; + // std::cout << "migratable-conversion: " << value << std::endl; return value; } private: From fdce48fd44d2339bb947c381069fa79c0b00907a Mon Sep 17 00:00:00 2001 From: bemdeppi Date: Tue, 27 Mar 2018 14:25:05 +0200 Subject: [PATCH 008/150] jamroot switch scif to mpi, uncomment ctor output from migratable --- Jamroot | 6 +++--- include/ham/misc/migratable.hpp | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Jamroot b/Jamroot index 39f7e78..1d27769 100644 --- a/Jamroot +++ b/Jamroot @@ -121,9 +121,9 @@ exe active_msgs_over_file ; exe ham_offload - : ham_offload.cpp ham_offload_scif boost_program_options -# : /mpi//mpi HAM_COMM_MPI - : scif HAM_COMM_SCIF + : ham_offload.cpp ham_offload_mpi boost_program_options + : /mpi//mpi HAM_COMM_MPI +# : scif HAM_COMM_SCIF ; exe ham_offload_explicit diff --git a/include/ham/misc/migratable.hpp b/include/ham/misc/migratable.hpp index 9ed002e..0a31b42 100644 --- a/include/ham/misc/migratable.hpp +++ b/include/ham/misc/migratable.hpp @@ -28,12 +28,12 @@ class migratable //: value(std::forward(arg)) // NOTE: compatible types are allowed : value(std::forward(arg)) // NOTE: compatible types are allowed { - std::cout << "migratable-ctor: " << value << std::endl; + // std::cout << "migratable-ctor: " << value << std::endl; } operator const T& () const { - std::cout << "migratable-conversion: " << value << std::endl; + // std::cout << "migratable-conversion: " << value << std::endl; return value; } private: From 34848b591d15db0a92a1ada857dccb64a02f76d0 Mon Sep 17 00:00:00 2001 From: Phuzzyhead Date: Thu, 29 Mar 2018 13:40:58 +0200 Subject: [PATCH 009/150] initial commit of mpi_rma_dynamic prototype --- include/ham/offload/offload.hpp | 18 +++++++++++++++ include/ham/offload/offload_msg.hpp | 34 ++++++++++++++++++++++++++++- 2 files changed, 51 insertions(+), 1 deletion(-) diff --git a/include/ham/offload/offload.hpp b/include/ham/offload/offload.hpp index a84d338..a3cff70 100644 --- a/include/ham/offload/offload.hpp +++ b/include/ham/offload/offload.hpp @@ -235,6 +235,11 @@ future put(T* local_source, buffer_ptr& remote_dest, size_t n) return result; #endif +#ifdef SOME_COOL_VAR_FOR_MPI_RMA_DYN // compile-time integration pending + future result(comm.allocate_request(remote_dest.node())); + HAM_DEBUG( HAM_LOG << "offload::put(): initiating RMA put..." << std::endl; ) + comm.send_data_async(result.get_request(), local_source, remote_dest, n); +#endif } template @@ -268,6 +273,11 @@ future get(buffer_ptr remote_source, T* local_dest, size_t n) return result; #endif +#ifdef SOME_COOL_VAR_FOR_MPI_RMA_DYN // compile-time integration pending + future result(comm.allocate_request(remote_dest.node())); + HAM_DEBUG( HAM_LOG << "offload::put(): initiating RMA get..." << std::endl; ) + comm.recv_data_async(result.get_request(), remote_source, local_dest, n); +#endif } template @@ -328,6 +338,14 @@ void copy_sync(buffer_ptr source, buffer_ptr dest, size_t n) read_result.get(); write_result.get(); #endif +#ifdef SOME_COOL_VAR_FOR_MPI_RMA_DYN // compile-integration pending + future result(comm.allocate_request(source.node())); + HAM_DEBUG( HAM_LOG << "offload::copy_sync(): initiating copy between " << source.node() << " and " << dest.node() << std::endl; ) + SEND READ_MSG to source (maybe introduce new copy_msg) + MAKE SURE there is no winlock on dest from host + + comm.send_data_async(result.get_request(), local_source, remote_dest, n); +#endif } #endif diff --git a/include/ham/offload/offload_msg.hpp b/include/ham/offload/offload_msg.hpp index c42ffb8..845dd08 100644 --- a/include/ham/offload/offload_msg.hpp +++ b/include/ham/offload/offload_msg.hpp @@ -6,6 +6,7 @@ #ifndef ham_offload_offload_msg_hpp #define ham_offload_offload_msg_hpp +#include #include "ham/msg/active_msg.hpp" #include "ham/msg/execution_policy.hpp" #include "ham/misc/constants.hpp" @@ -80,6 +81,7 @@ class offload_msg } }; +// should not be used by MPI_RMA_COMMUNICATOR since one-sided put is used template class ExecutionPolicy = default_execution_policy> class offload_write_msg : public active_msg, ExecutionPolicy> @@ -106,6 +108,7 @@ class offload_write_msg }; +// should not be used by MPI_RMA_COMMUNICATOR since one-sided put is used template class ExecutionPolicy = default_execution_policy> class offload_read_msg : public active_msg, ExecutionPolicy> @@ -116,7 +119,7 @@ class offload_read_msg void operator()() //const { - communicator::instance().send_data(local_source, buffer_ptr(nullptr, remote_node), n); // NOTE: Why nullptr? This is for two-sided communicators, so we do not know the remote address, but match a receive operation that has the address. + communicator::instance().send_data(local_source, buffer_ptr(nullptr, remote_node), n); // NOTE: Why nullptr? This is for two-sided communicators, so we do not know the remote address, but match a receive operation that has the address. // send a result message to tell the sender, that the transfer is done if (req.valid()) { @@ -131,6 +134,35 @@ class offload_read_msg size_t n; }; + +// TODO(daniel, high priority): implement offload_copy_msg, copy with one-sided rma needs a msg containing ptrs for source+target +//#ifdef SOME_COOL_VAR_FOR_MPI_RMA_DYN // compile-integration pending + template class ExecutionPolicy = default_execution_policy> + class offload_rma_copy_msg + : public active_msg, ExecutionPolicy> + { + public: + offload_rma_copy_msg(communicator::request req, node_t remote_node, MPI_Aint remote_addr,T* local_source, size_t n) + : req(req), remote_node(remote_node), remote_addr(remote_addr), local_source(local_source), n(n) { } + + void operator()() //const + { + communicator::instance().send_data(local_source, buffer_ptr(nullptr, remote_node, remote_addr), n); // NOTE: Why nullptr? This is for two-sided communicators, so we do not know the remote address, but match a receive operation that has the address. + + // send a result message to tell the sender, that the transfer is done + if (req.valid()) { + req.send_result((void*)&n, sizeof n); + } + } + private: + communicator::request req; // TODO(improvement, high priority): use a subset of req here! + + node_t remote_node; + MPI_Aint remote_addr; + T* local_source; + size_t n; + }; +//#endif } // namespace detail } // namespace offload } // namespace ham From e5c46ed4953df083d8d3e7c9e7edb1aa9259e77f Mon Sep 17 00:00:00 2001 From: Deppisch Date: Thu, 29 Mar 2018 13:40:58 +0200 Subject: [PATCH 010/150] initial commit of mpi_rma_dynamic prototype --- include/ham/offload/offload.hpp | 18 +++++++++++++++ include/ham/offload/offload_msg.hpp | 34 ++++++++++++++++++++++++++++- 2 files changed, 51 insertions(+), 1 deletion(-) diff --git a/include/ham/offload/offload.hpp b/include/ham/offload/offload.hpp index a84d338..a3cff70 100644 --- a/include/ham/offload/offload.hpp +++ b/include/ham/offload/offload.hpp @@ -235,6 +235,11 @@ future put(T* local_source, buffer_ptr& remote_dest, size_t n) return result; #endif +#ifdef SOME_COOL_VAR_FOR_MPI_RMA_DYN // compile-time integration pending + future result(comm.allocate_request(remote_dest.node())); + HAM_DEBUG( HAM_LOG << "offload::put(): initiating RMA put..." << std::endl; ) + comm.send_data_async(result.get_request(), local_source, remote_dest, n); +#endif } template @@ -268,6 +273,11 @@ future get(buffer_ptr remote_source, T* local_dest, size_t n) return result; #endif +#ifdef SOME_COOL_VAR_FOR_MPI_RMA_DYN // compile-time integration pending + future result(comm.allocate_request(remote_dest.node())); + HAM_DEBUG( HAM_LOG << "offload::put(): initiating RMA get..." << std::endl; ) + comm.recv_data_async(result.get_request(), remote_source, local_dest, n); +#endif } template @@ -328,6 +338,14 @@ void copy_sync(buffer_ptr source, buffer_ptr dest, size_t n) read_result.get(); write_result.get(); #endif +#ifdef SOME_COOL_VAR_FOR_MPI_RMA_DYN // compile-integration pending + future result(comm.allocate_request(source.node())); + HAM_DEBUG( HAM_LOG << "offload::copy_sync(): initiating copy between " << source.node() << " and " << dest.node() << std::endl; ) + SEND READ_MSG to source (maybe introduce new copy_msg) + MAKE SURE there is no winlock on dest from host + + comm.send_data_async(result.get_request(), local_source, remote_dest, n); +#endif } #endif diff --git a/include/ham/offload/offload_msg.hpp b/include/ham/offload/offload_msg.hpp index c42ffb8..845dd08 100644 --- a/include/ham/offload/offload_msg.hpp +++ b/include/ham/offload/offload_msg.hpp @@ -6,6 +6,7 @@ #ifndef ham_offload_offload_msg_hpp #define ham_offload_offload_msg_hpp +#include #include "ham/msg/active_msg.hpp" #include "ham/msg/execution_policy.hpp" #include "ham/misc/constants.hpp" @@ -80,6 +81,7 @@ class offload_msg } }; +// should not be used by MPI_RMA_COMMUNICATOR since one-sided put is used template class ExecutionPolicy = default_execution_policy> class offload_write_msg : public active_msg, ExecutionPolicy> @@ -106,6 +108,7 @@ class offload_write_msg }; +// should not be used by MPI_RMA_COMMUNICATOR since one-sided put is used template class ExecutionPolicy = default_execution_policy> class offload_read_msg : public active_msg, ExecutionPolicy> @@ -116,7 +119,7 @@ class offload_read_msg void operator()() //const { - communicator::instance().send_data(local_source, buffer_ptr(nullptr, remote_node), n); // NOTE: Why nullptr? This is for two-sided communicators, so we do not know the remote address, but match a receive operation that has the address. + communicator::instance().send_data(local_source, buffer_ptr(nullptr, remote_node), n); // NOTE: Why nullptr? This is for two-sided communicators, so we do not know the remote address, but match a receive operation that has the address. // send a result message to tell the sender, that the transfer is done if (req.valid()) { @@ -131,6 +134,35 @@ class offload_read_msg size_t n; }; + +// TODO(daniel, high priority): implement offload_copy_msg, copy with one-sided rma needs a msg containing ptrs for source+target +//#ifdef SOME_COOL_VAR_FOR_MPI_RMA_DYN // compile-integration pending + template class ExecutionPolicy = default_execution_policy> + class offload_rma_copy_msg + : public active_msg, ExecutionPolicy> + { + public: + offload_rma_copy_msg(communicator::request req, node_t remote_node, MPI_Aint remote_addr,T* local_source, size_t n) + : req(req), remote_node(remote_node), remote_addr(remote_addr), local_source(local_source), n(n) { } + + void operator()() //const + { + communicator::instance().send_data(local_source, buffer_ptr(nullptr, remote_node, remote_addr), n); // NOTE: Why nullptr? This is for two-sided communicators, so we do not know the remote address, but match a receive operation that has the address. + + // send a result message to tell the sender, that the transfer is done + if (req.valid()) { + req.send_result((void*)&n, sizeof n); + } + } + private: + communicator::request req; // TODO(improvement, high priority): use a subset of req here! + + node_t remote_node; + MPI_Aint remote_addr; + T* local_source; + size_t n; + }; +//#endif } // namespace detail } // namespace offload } // namespace ham From da9c4e964437c864dabc364660a4b36e79888b69 Mon Sep 17 00:00:00 2001 From: Phuzzyhead Date: Thu, 29 Mar 2018 13:43:49 +0200 Subject: [PATCH 011/150] initial commit of mpi_rma_dynamic prototype --- .../ham/net/communicator_mpi_rma_dynamic.hpp | 368 ++++++++++++++++++ 1 file changed, 368 insertions(+) create mode 100644 include/ham/net/communicator_mpi_rma_dynamic.hpp diff --git a/include/ham/net/communicator_mpi_rma_dynamic.hpp b/include/ham/net/communicator_mpi_rma_dynamic.hpp new file mode 100644 index 0000000..d1b1add --- /dev/null +++ b/include/ham/net/communicator_mpi_rma_dynamic.hpp @@ -0,0 +1,368 @@ +// Copyright (c) 2013-2014 Matthias Noack (ma.noack.pr@gmail.com) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef ham_net_communicator_mpi_hpp +#define ham_net_communicator_mpi_hpp + +#include + +#include +#include // memcpy +#include // posix_memalign + +#include "ham/misc/constants.hpp" +#include "ham/misc/resource_pool.hpp" +#include "ham/misc/types.hpp" +#include "ham/util/debug.hpp" +#include "ham/util/log.hpp" + +namespace ham { +namespace net { + +template +class buffer_ptr { +public: + buffer_ptr(); + buffer_ptr(T* ptr, node_t node) : ptr_(ptr), node_(node), mpi_address_(0) { } + buffer_ptr(T* ptr, node_t node, MPI_Aint mpi_address) : ptr_(ptr), node_(node), mpi_address_(mpi_address) { } + + + T* get() { return ptr_; } + node_t node() { return node_; } + MPI_Aint get_mpi_address() { return mpi_address_; } + + // element access + T& operator [] (size_t i); + + // basic pointer arithmetic to address sub-buffers + buffer_ptr operator+(size_t off) + { + return buffer_ptr(ptr_ + off, node_); + } + +private: + T* ptr_; + node_t node_; + MPI_Aint mpi_address_; +}; + +class node_descriptor +{ +public: + //node_descriptor() : name(MPI_MAX_PROCESSOR_NAME, 0) {} + + //const std::string& name() const { return name_; } + const char* name() const { return name_; } +private: + //std::string name_; // TODO(improvement): unify node description for all back-ends, NOTE: std::string is not trivally transferable + char name_[MPI_MAX_PROCESSOR_NAME + 1]; + + friend class net::communicator; +}; + +class communicator { +public: + // externally used interface of request must be shared across all communicator-implementations + class request { + public: + request() : valid_(false) {} // instantiate invalid + + request(node_t target_node, node_t source_node, size_t send_buffer_index, size_t recv_buffer_index) + : target_node(target_node), source_node(source_node), valid_(true), send_buffer_index(send_buffer_index), recv_buffer_index(recv_buffer_index), req_count(0) + {} + + // return true if request was finished + // will not work as intended for rma ops, no equivalent to test() available for remote completion + bool test() + { + int flag = 0; + MPI_Testall(req_count, mpi_reqs, &flag, MPI_STATUS_IGNORE); // just test the receive request, since the send belonging to the request triggers the remote send that is received + + if(uses_rma) + { + HAM_DEBUG( HAM_LOG << "request::test(), warning: may give false positive on rma remote completion" << std::endl; ) + } + + return flag != 0; + } + + void* get() // blocks + { + HAM_DEBUG( HAM_LOG << "request::get(), before MPI_Waitall()" << std::endl; ) + MPI_Waitall(req_count, mpi_reqs, MPI_STATUS_IGNORE); // must wait for all requests to satisfy the standard + HAM_DEBUG( HAM_LOG << "request::get(), after MPI_Waitall()" << std::endl; ) + if(uses_rma) + { + MPI_Win_unlock(target_node, rma_win); + } + return static_cast(&communicator::instance().peers[target_node].msg_buffers[recv_buffer_index]); + } + + template + void send_result(T* result_msg, size_t size) + { + assert(communicator::this_node() == target_node); // this assert fails if send_result is called from the wrong side + + // TODO(improvement, low priority): better go through communicator, such that no MPI calls are anywhere else + MPI_Send(result_msg, size, MPI_BYTE, source_node, constants::RESULT_TAG, MPI_COMM_WORLD); + //communicator::instance().send_msg(source_node, source_buffer_index, NO_BUFFER_INDEX, result_msg, size); + } + + bool valid() const + { + return valid_; + } + + bool uses_rma() const + { + return uses_rma_; + } + + MPI_Request& next_mpi_request() + { + HAM_DEBUG( HAM_LOG << "next_mpi_request(): this=" << this << ", req_count=" << req_count << ", NUM_REQUESTS=" << NUM_REQUESTS << std::endl; ) + assert(req_count < NUM_REQUESTS); + return mpi_reqs[req_count++]; // NOTE: post-increment + } + + node_t target_node; + node_t source_node; + bool valid_; + bool uses_rma; + + // only needed by the sender + enum { NUM_REQUESTS = 3 }; + + size_t send_buffer_index; // buffer to use for sending the message + size_t recv_buffer_index; // buffer to use for receiving the result + size_t req_count; + + private: + MPI_Request mpi_reqs[NUM_REQUESTS]; // for sending the msg, receiving the result, and an associated data transfer + }; // class request + + typedef request& request_reference_type; + typedef const request& request_const_reference_type; + + communicator(int argc, char* argv[]) + { + HAM_DEBUG( std::cout << "communicator::communicator(): initialising MPI" << std::endl; ) + + instance_ = this; + int p; + MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &p); + if (p != MPI_THREAD_MULTIPLE) + { + std::cerr << "Could not initialise MPI with MPI_THREAD_MULTIPLE, MPI_Init_thread() returned " << p << std::endl; + } + HAM_DEBUG( std::cout << "communicator::communicator(): initialising MPI ..." << std::endl; ) + + int t; + MPI_Comm_rank(MPI_COMM_WORLD, &t); + this_node_ = t; + MPI_Comm_size(MPI_COMM_WORLD, &t); + nodes_ = t; + host_node_ = 0; // TODO(improvement): make configureable, like for SCIF + MPI_Win_create_dynamic(MPI_INFO_NULL, MPI_COMM_WORLD, &rma_win); + + HAM_DEBUG( std::cout << "communicator::communicator(): initialising MPI done" << std::endl; ) + + peers = new mpi_peer[nodes_]; + + // start of node descriptor code: + node_descriptions.resize(nodes_); + + // build own node descriptor + node_descriptor node_description; + int count; + MPI_Get_processor_name(node_description.name_, &count); + node_description.name_[count] = 0x0; // null terminate + +// char hostname[MPI_MAX_PROCESSOR_NAME + 1]; +// MPI_Get_processor_name(hostname, &count); +// hostname[count] = 0x0; // null terminate +// node_description.name_.assign(hostname, count); + + // append rank for testing: + //node_description.name_[count] = 48 + this_node_; + //node_description.name_[count+1] = 0x0; + + // communicate descriptors between nodes + HAM_DEBUG( HAM_LOG << "communicator::communicator(): gathering node descriptions" << std::endl; ) + //MPI_Alltoall(&node_description, sizeof(node_descriptor), MPI_BYTE, node_descriptions.data(), sizeof(node_descriptor), MPI_BYTE, MPI_COMM_WORLD); + MPI_Allgather(&node_description, sizeof(node_descriptor), MPI_BYTE, node_descriptions.data(), sizeof(node_descriptor), MPI_BYTE, MPI_COMM_WORLD); + HAM_DEBUG( HAM_LOG << "communicator::communicator(): gathering node descriptions done" << std::endl; ) + + if (is_host()) { + for (node_t i = 1; i < nodes_; ++i) { // TODO(improvement): needs to be changed when host-rank becomes configurable + // allocate buffers + peers[i].msg_buffers = allocate_buffer(constants::MSG_BUFFERS, this_node_); + // fill resource pools + for(size_t j = constants::MSG_BUFFERS; j > 0; --j) { + peers[i].buffer_pool.add(j-1); + } + } + } + } + + ~communicator() + { + MPI_Finalize(); // TODO(improvement): check on error and create output if there was one + HAM_DEBUG( HAM_LOG << "~communicator" << std::endl; ) + } + + + request allocate_request(node_t remote_node) + { + HAM_DEBUG( HAM_LOG << "communicator::allocate_next_request(): remote_node = " << remote_node << std::endl; ) + + const size_t send_buffer_index = peers[remote_node].buffer_pool.allocate(); + const size_t recv_buffer_index = peers[remote_node].buffer_pool.allocate(); + + return { remote_node, this_node_, send_buffer_index, recv_buffer_index }; + } + + void free_request(request& req) + { + assert(req.valid()); + assert(req.source_node == this_node_); + + mpi_peer& peer = peers[req.target_node]; + + peer.buffer_pool.free(req.send_buffer_index); + peer.buffer_pool.free(req.recv_buffer_index); + req.valid_ = false; + } + +public: + void send_msg(request_reference_type req, void* msg, size_t size) + { + // copy message from caller into transfer buffer + void* msg_buffer = static_cast(&peers[req.target_node].msg_buffers[req.send_buffer_index]); + memcpy(msg_buffer, msg, size); + MPI_Isend(msg_buffer, size, MPI_BYTE, req.target_node, constants::DEFAULT_TAG, MPI_COMM_WORLD, &req.next_mpi_request()); + } + + // to be used by the offload target's main loop: synchronously receive one message at a time + // NOTE: the local static receive buffer! + void* recv_msg_host(void* msg = nullptr, size_t size = constants::MSG_SIZE) + { + static msg_buffer buffer; // NOTE ! + MPI_Recv(&buffer, size, MPI_BYTE, host_node_, constants::DEFAULT_TAG, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + return static_cast(&buffer); + } + + // trigger receiving the result of a message on the sending side + void recv_result(request_reference_type req) + { + // nothing todo here, since this communicator implementation uses one-sided communication + // the data is already where it is expected (in the buffer referenced in req) + MPI_Irecv(static_cast(&peers[req.target_node].msg_buffers[req.recv_buffer_index]), constants::MSG_SIZE, MPI_BYTE, req.target_node, constants::RESULT_TAG, MPI_COMM_WORLD, &req.next_mpi_request()); + return; + } + + template + void send_data(T* local_source, buffer_ptr remote_dest, size_t size) + { + //MPI_Send((void*)local_source, size * sizeof(T), MPI_BYTE, remote_dest.node(), constants::DATA_TAG, MPI_COMM_WORLD); + MPI_Win_lock(MPI_LOCK_EXCLUSIVE, remote_dest.node(), 0, rma_win); + MPI_Put(local_source, size, MPI_BYTE, remote_dest.node(), (void *) remote_dest.get_mpi_address(), size, MPI_BYTE, rma_win); + MPI_Win_unlock(remote_dest.node(), rma_win); + } + + // to be used by the host + template + void send_data_async(request_reference_type req, T* local_source, buffer_ptr remote_dest, size_t size) + { + //MPI_Isend((void*)local_source, size * sizeof(T), MPI_BYTE, remote_dest.node(), constants::DATA_TAG, MPI_COMM_WORLD, &req.next_mpi_request()); + req.uses_rma = true; + + MPI_Win_lock(MPI_LOCK_EXCLUSIVE, remote_dest.node(), 0, rma_win); + MPI_Rput(local_source, size, MPI_BYTE, remote_dest.node(), (void *) remote_dest.get_mpi_address(), size, MPI_BYTE, rma_win, &re.next_mpi_request()); + } + + + template + void recv_data(buffer_ptr remote_source, T* local_dest, size_t size) + { + //MPI_Recv((void*)local_dest, size * sizeof(T), MPI_BYTE, remote_source.node(), constants::DATA_TAG, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Get(remote_source, size, MPI_BYTE, remote_source.node(), (void *) remote_dest.get_mpi_address(), size, MPI_BYTE, rma_win); + MPI_Win_flush(remote_source.node(), rma_win); + } + + // to be used by the host + template + void recv_data_async(request_reference_type req, buffer_ptr remote_source, T* local_dest, size_t size) + { + //MPI_Irecv(static_cast(local_dest), size * sizeof(T), MPI_BYTE, remote_source.node(), constants::DATA_TAG, MPI_COMM_WORLD, &req.next_mpi_request()); + req.uses_rma = true; + MPI_RGet(remote_source, size, MPI_BYTE, remote_source.node(), (void *) remote_dest.get_mpi_address(), size, MPI_BYTE, rma_win, &req.next_mpi_request()); + } + + template + buffer_ptr allocate_buffer(const size_t n, node_t source_node) + { + T* ptr; + //int err = + posix_memalign((void**)&ptr, constants::CACHE_LINE_SIZE, n * sizeof(T)); + MPI_Aint mpi_address; + MPI_Win_attach(rma_win, (void *) &mpi_address, n * sizeof(T)); + // NOTE: no ctor is called + return buffer_ptr(ptr, this_node_, mpi_address); + } + + template + void free_buffer(buffer_ptr ptr) + { + assert(ptr.node() == this_node_); + // NOTE: no dtor is called + MPI_Win_detach(rma_win, ptr.get()); + free(static_cast(ptr.get())); + } + + static communicator& instance() { return *instance_; } + static node_t this_node() { return instance().this_node_; } + static size_t num_nodes() { return instance().nodes_; } + bool is_host() { return this_node_ == 0; } // TODO(improvement): ham_address == ham_host_address ; } + bool is_host(node_t node) { return node == 0; } // TODO(improvement): node == ham_host_address; } + + static const node_descriptor& get_node_description(node_t node) + { + return instance().node_descriptions[node]; + } + +private: + static communicator* instance_; + node_t this_node_; + size_t nodes_; + node_t host_node_; + std::vector node_descriptions; // not as member in peer below, because Allgather is used to exchange node descriptions + MPI_Win rma_win; // globally shared dynamic window for rma ops + + struct mpi_peer { + buffer_ptr msg_buffers; // buffers used for MPI_ISend and IRecv by the sender + + // needed by sender to manage which buffers are in use and which are free + // just manages indices, that can be used by + detail::resource_pool buffer_pool; + }; + + mpi_peer* peers; +}; + +template +buffer_ptr::buffer_ptr() : buffer_ptr(nullptr, communicator::this_node()) { } + +template +T& buffer_ptr::operator[](size_t i) +{ + assert(node_ == communicator::this_node()); + return ptr_[i]; +} + +} // namespace net +} // namespace ham + +#endif // ham_net_communicator_mpi_hpp From 590fa6a6bb46a75c2a85826614562b68e7fbe421 Mon Sep 17 00:00:00 2001 From: Deppisch Date: Thu, 29 Mar 2018 13:43:49 +0200 Subject: [PATCH 012/150] initial commit of mpi_rma_dynamic prototype --- .../ham/net/communicator_mpi_rma_dynamic.hpp | 368 ++++++++++++++++++ 1 file changed, 368 insertions(+) create mode 100644 include/ham/net/communicator_mpi_rma_dynamic.hpp diff --git a/include/ham/net/communicator_mpi_rma_dynamic.hpp b/include/ham/net/communicator_mpi_rma_dynamic.hpp new file mode 100644 index 0000000..d1b1add --- /dev/null +++ b/include/ham/net/communicator_mpi_rma_dynamic.hpp @@ -0,0 +1,368 @@ +// Copyright (c) 2013-2014 Matthias Noack (ma.noack.pr@gmail.com) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef ham_net_communicator_mpi_hpp +#define ham_net_communicator_mpi_hpp + +#include + +#include +#include // memcpy +#include // posix_memalign + +#include "ham/misc/constants.hpp" +#include "ham/misc/resource_pool.hpp" +#include "ham/misc/types.hpp" +#include "ham/util/debug.hpp" +#include "ham/util/log.hpp" + +namespace ham { +namespace net { + +template +class buffer_ptr { +public: + buffer_ptr(); + buffer_ptr(T* ptr, node_t node) : ptr_(ptr), node_(node), mpi_address_(0) { } + buffer_ptr(T* ptr, node_t node, MPI_Aint mpi_address) : ptr_(ptr), node_(node), mpi_address_(mpi_address) { } + + + T* get() { return ptr_; } + node_t node() { return node_; } + MPI_Aint get_mpi_address() { return mpi_address_; } + + // element access + T& operator [] (size_t i); + + // basic pointer arithmetic to address sub-buffers + buffer_ptr operator+(size_t off) + { + return buffer_ptr(ptr_ + off, node_); + } + +private: + T* ptr_; + node_t node_; + MPI_Aint mpi_address_; +}; + +class node_descriptor +{ +public: + //node_descriptor() : name(MPI_MAX_PROCESSOR_NAME, 0) {} + + //const std::string& name() const { return name_; } + const char* name() const { return name_; } +private: + //std::string name_; // TODO(improvement): unify node description for all back-ends, NOTE: std::string is not trivally transferable + char name_[MPI_MAX_PROCESSOR_NAME + 1]; + + friend class net::communicator; +}; + +class communicator { +public: + // externally used interface of request must be shared across all communicator-implementations + class request { + public: + request() : valid_(false) {} // instantiate invalid + + request(node_t target_node, node_t source_node, size_t send_buffer_index, size_t recv_buffer_index) + : target_node(target_node), source_node(source_node), valid_(true), send_buffer_index(send_buffer_index), recv_buffer_index(recv_buffer_index), req_count(0) + {} + + // return true if request was finished + // will not work as intended for rma ops, no equivalent to test() available for remote completion + bool test() + { + int flag = 0; + MPI_Testall(req_count, mpi_reqs, &flag, MPI_STATUS_IGNORE); // just test the receive request, since the send belonging to the request triggers the remote send that is received + + if(uses_rma) + { + HAM_DEBUG( HAM_LOG << "request::test(), warning: may give false positive on rma remote completion" << std::endl; ) + } + + return flag != 0; + } + + void* get() // blocks + { + HAM_DEBUG( HAM_LOG << "request::get(), before MPI_Waitall()" << std::endl; ) + MPI_Waitall(req_count, mpi_reqs, MPI_STATUS_IGNORE); // must wait for all requests to satisfy the standard + HAM_DEBUG( HAM_LOG << "request::get(), after MPI_Waitall()" << std::endl; ) + if(uses_rma) + { + MPI_Win_unlock(target_node, rma_win); + } + return static_cast(&communicator::instance().peers[target_node].msg_buffers[recv_buffer_index]); + } + + template + void send_result(T* result_msg, size_t size) + { + assert(communicator::this_node() == target_node); // this assert fails if send_result is called from the wrong side + + // TODO(improvement, low priority): better go through communicator, such that no MPI calls are anywhere else + MPI_Send(result_msg, size, MPI_BYTE, source_node, constants::RESULT_TAG, MPI_COMM_WORLD); + //communicator::instance().send_msg(source_node, source_buffer_index, NO_BUFFER_INDEX, result_msg, size); + } + + bool valid() const + { + return valid_; + } + + bool uses_rma() const + { + return uses_rma_; + } + + MPI_Request& next_mpi_request() + { + HAM_DEBUG( HAM_LOG << "next_mpi_request(): this=" << this << ", req_count=" << req_count << ", NUM_REQUESTS=" << NUM_REQUESTS << std::endl; ) + assert(req_count < NUM_REQUESTS); + return mpi_reqs[req_count++]; // NOTE: post-increment + } + + node_t target_node; + node_t source_node; + bool valid_; + bool uses_rma; + + // only needed by the sender + enum { NUM_REQUESTS = 3 }; + + size_t send_buffer_index; // buffer to use for sending the message + size_t recv_buffer_index; // buffer to use for receiving the result + size_t req_count; + + private: + MPI_Request mpi_reqs[NUM_REQUESTS]; // for sending the msg, receiving the result, and an associated data transfer + }; // class request + + typedef request& request_reference_type; + typedef const request& request_const_reference_type; + + communicator(int argc, char* argv[]) + { + HAM_DEBUG( std::cout << "communicator::communicator(): initialising MPI" << std::endl; ) + + instance_ = this; + int p; + MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &p); + if (p != MPI_THREAD_MULTIPLE) + { + std::cerr << "Could not initialise MPI with MPI_THREAD_MULTIPLE, MPI_Init_thread() returned " << p << std::endl; + } + HAM_DEBUG( std::cout << "communicator::communicator(): initialising MPI ..." << std::endl; ) + + int t; + MPI_Comm_rank(MPI_COMM_WORLD, &t); + this_node_ = t; + MPI_Comm_size(MPI_COMM_WORLD, &t); + nodes_ = t; + host_node_ = 0; // TODO(improvement): make configureable, like for SCIF + MPI_Win_create_dynamic(MPI_INFO_NULL, MPI_COMM_WORLD, &rma_win); + + HAM_DEBUG( std::cout << "communicator::communicator(): initialising MPI done" << std::endl; ) + + peers = new mpi_peer[nodes_]; + + // start of node descriptor code: + node_descriptions.resize(nodes_); + + // build own node descriptor + node_descriptor node_description; + int count; + MPI_Get_processor_name(node_description.name_, &count); + node_description.name_[count] = 0x0; // null terminate + +// char hostname[MPI_MAX_PROCESSOR_NAME + 1]; +// MPI_Get_processor_name(hostname, &count); +// hostname[count] = 0x0; // null terminate +// node_description.name_.assign(hostname, count); + + // append rank for testing: + //node_description.name_[count] = 48 + this_node_; + //node_description.name_[count+1] = 0x0; + + // communicate descriptors between nodes + HAM_DEBUG( HAM_LOG << "communicator::communicator(): gathering node descriptions" << std::endl; ) + //MPI_Alltoall(&node_description, sizeof(node_descriptor), MPI_BYTE, node_descriptions.data(), sizeof(node_descriptor), MPI_BYTE, MPI_COMM_WORLD); + MPI_Allgather(&node_description, sizeof(node_descriptor), MPI_BYTE, node_descriptions.data(), sizeof(node_descriptor), MPI_BYTE, MPI_COMM_WORLD); + HAM_DEBUG( HAM_LOG << "communicator::communicator(): gathering node descriptions done" << std::endl; ) + + if (is_host()) { + for (node_t i = 1; i < nodes_; ++i) { // TODO(improvement): needs to be changed when host-rank becomes configurable + // allocate buffers + peers[i].msg_buffers = allocate_buffer(constants::MSG_BUFFERS, this_node_); + // fill resource pools + for(size_t j = constants::MSG_BUFFERS; j > 0; --j) { + peers[i].buffer_pool.add(j-1); + } + } + } + } + + ~communicator() + { + MPI_Finalize(); // TODO(improvement): check on error and create output if there was one + HAM_DEBUG( HAM_LOG << "~communicator" << std::endl; ) + } + + + request allocate_request(node_t remote_node) + { + HAM_DEBUG( HAM_LOG << "communicator::allocate_next_request(): remote_node = " << remote_node << std::endl; ) + + const size_t send_buffer_index = peers[remote_node].buffer_pool.allocate(); + const size_t recv_buffer_index = peers[remote_node].buffer_pool.allocate(); + + return { remote_node, this_node_, send_buffer_index, recv_buffer_index }; + } + + void free_request(request& req) + { + assert(req.valid()); + assert(req.source_node == this_node_); + + mpi_peer& peer = peers[req.target_node]; + + peer.buffer_pool.free(req.send_buffer_index); + peer.buffer_pool.free(req.recv_buffer_index); + req.valid_ = false; + } + +public: + void send_msg(request_reference_type req, void* msg, size_t size) + { + // copy message from caller into transfer buffer + void* msg_buffer = static_cast(&peers[req.target_node].msg_buffers[req.send_buffer_index]); + memcpy(msg_buffer, msg, size); + MPI_Isend(msg_buffer, size, MPI_BYTE, req.target_node, constants::DEFAULT_TAG, MPI_COMM_WORLD, &req.next_mpi_request()); + } + + // to be used by the offload target's main loop: synchronously receive one message at a time + // NOTE: the local static receive buffer! + void* recv_msg_host(void* msg = nullptr, size_t size = constants::MSG_SIZE) + { + static msg_buffer buffer; // NOTE ! + MPI_Recv(&buffer, size, MPI_BYTE, host_node_, constants::DEFAULT_TAG, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + return static_cast(&buffer); + } + + // trigger receiving the result of a message on the sending side + void recv_result(request_reference_type req) + { + // nothing todo here, since this communicator implementation uses one-sided communication + // the data is already where it is expected (in the buffer referenced in req) + MPI_Irecv(static_cast(&peers[req.target_node].msg_buffers[req.recv_buffer_index]), constants::MSG_SIZE, MPI_BYTE, req.target_node, constants::RESULT_TAG, MPI_COMM_WORLD, &req.next_mpi_request()); + return; + } + + template + void send_data(T* local_source, buffer_ptr remote_dest, size_t size) + { + //MPI_Send((void*)local_source, size * sizeof(T), MPI_BYTE, remote_dest.node(), constants::DATA_TAG, MPI_COMM_WORLD); + MPI_Win_lock(MPI_LOCK_EXCLUSIVE, remote_dest.node(), 0, rma_win); + MPI_Put(local_source, size, MPI_BYTE, remote_dest.node(), (void *) remote_dest.get_mpi_address(), size, MPI_BYTE, rma_win); + MPI_Win_unlock(remote_dest.node(), rma_win); + } + + // to be used by the host + template + void send_data_async(request_reference_type req, T* local_source, buffer_ptr remote_dest, size_t size) + { + //MPI_Isend((void*)local_source, size * sizeof(T), MPI_BYTE, remote_dest.node(), constants::DATA_TAG, MPI_COMM_WORLD, &req.next_mpi_request()); + req.uses_rma = true; + + MPI_Win_lock(MPI_LOCK_EXCLUSIVE, remote_dest.node(), 0, rma_win); + MPI_Rput(local_source, size, MPI_BYTE, remote_dest.node(), (void *) remote_dest.get_mpi_address(), size, MPI_BYTE, rma_win, &re.next_mpi_request()); + } + + + template + void recv_data(buffer_ptr remote_source, T* local_dest, size_t size) + { + //MPI_Recv((void*)local_dest, size * sizeof(T), MPI_BYTE, remote_source.node(), constants::DATA_TAG, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Get(remote_source, size, MPI_BYTE, remote_source.node(), (void *) remote_dest.get_mpi_address(), size, MPI_BYTE, rma_win); + MPI_Win_flush(remote_source.node(), rma_win); + } + + // to be used by the host + template + void recv_data_async(request_reference_type req, buffer_ptr remote_source, T* local_dest, size_t size) + { + //MPI_Irecv(static_cast(local_dest), size * sizeof(T), MPI_BYTE, remote_source.node(), constants::DATA_TAG, MPI_COMM_WORLD, &req.next_mpi_request()); + req.uses_rma = true; + MPI_RGet(remote_source, size, MPI_BYTE, remote_source.node(), (void *) remote_dest.get_mpi_address(), size, MPI_BYTE, rma_win, &req.next_mpi_request()); + } + + template + buffer_ptr allocate_buffer(const size_t n, node_t source_node) + { + T* ptr; + //int err = + posix_memalign((void**)&ptr, constants::CACHE_LINE_SIZE, n * sizeof(T)); + MPI_Aint mpi_address; + MPI_Win_attach(rma_win, (void *) &mpi_address, n * sizeof(T)); + // NOTE: no ctor is called + return buffer_ptr(ptr, this_node_, mpi_address); + } + + template + void free_buffer(buffer_ptr ptr) + { + assert(ptr.node() == this_node_); + // NOTE: no dtor is called + MPI_Win_detach(rma_win, ptr.get()); + free(static_cast(ptr.get())); + } + + static communicator& instance() { return *instance_; } + static node_t this_node() { return instance().this_node_; } + static size_t num_nodes() { return instance().nodes_; } + bool is_host() { return this_node_ == 0; } // TODO(improvement): ham_address == ham_host_address ; } + bool is_host(node_t node) { return node == 0; } // TODO(improvement): node == ham_host_address; } + + static const node_descriptor& get_node_description(node_t node) + { + return instance().node_descriptions[node]; + } + +private: + static communicator* instance_; + node_t this_node_; + size_t nodes_; + node_t host_node_; + std::vector node_descriptions; // not as member in peer below, because Allgather is used to exchange node descriptions + MPI_Win rma_win; // globally shared dynamic window for rma ops + + struct mpi_peer { + buffer_ptr msg_buffers; // buffers used for MPI_ISend and IRecv by the sender + + // needed by sender to manage which buffers are in use and which are free + // just manages indices, that can be used by + detail::resource_pool buffer_pool; + }; + + mpi_peer* peers; +}; + +template +buffer_ptr::buffer_ptr() : buffer_ptr(nullptr, communicator::this_node()) { } + +template +T& buffer_ptr::operator[](size_t i) +{ + assert(node_ == communicator::this_node()); + return ptr_[i]; +} + +} // namespace net +} // namespace ham + +#endif // ham_net_communicator_mpi_hpp From 6686a91e381aced0791749f3cf2fbb6984bd7409 Mon Sep 17 00:00:00 2001 From: Phuzzyhead Date: Thu, 29 Mar 2018 15:05:29 +0200 Subject: [PATCH 013/150] use shared locks for mpi rma windows --- include/ham/net/communicator.hpp | 4 ++- .../ham/net/communicator_mpi_rma_dynamic.hpp | 4 +-- include/ham/offload/offload.hpp | 30 +++++++++++++++---- include/ham/offload/offload_msg.hpp | 3 +- 4 files changed, 31 insertions(+), 10 deletions(-) diff --git a/include/ham/net/communicator.hpp b/include/ham/net/communicator.hpp index 52fe25b..4e84e2b 100644 --- a/include/ham/net/communicator.hpp +++ b/include/ham/net/communicator.hpp @@ -49,8 +49,10 @@ namespace net { #elif defined HAM_COMM_SCIF #define HAM_COMM_ONE_SIDED #include "ham/net/communicator_scif.hpp" +#elif defined HAM_COMM_MPI_RMA_DYNAMIC +#include "ham/net/communicator_scif.hpp" #else -static_assert(false, "Please define either HAM_COMM_MPI, or HAM_COMM_SCIF."); +static_assert(false, "Please define either HAM_COMM_MPI, HAM_COMM_MPI_RMA_DYNAMIC or HAM_COMM_SCIF."); #endif #endif // ham_net_communicator_hpp diff --git a/include/ham/net/communicator_mpi_rma_dynamic.hpp b/include/ham/net/communicator_mpi_rma_dynamic.hpp index d1b1add..ebe7a10 100644 --- a/include/ham/net/communicator_mpi_rma_dynamic.hpp +++ b/include/ham/net/communicator_mpi_rma_dynamic.hpp @@ -267,7 +267,7 @@ class communicator { void send_data(T* local_source, buffer_ptr remote_dest, size_t size) { //MPI_Send((void*)local_source, size * sizeof(T), MPI_BYTE, remote_dest.node(), constants::DATA_TAG, MPI_COMM_WORLD); - MPI_Win_lock(MPI_LOCK_EXCLUSIVE, remote_dest.node(), 0, rma_win); + MPI_Win_lock(MPI_LOCK_SHARED, remote_dest.node(), 0, rma_win); MPI_Put(local_source, size, MPI_BYTE, remote_dest.node(), (void *) remote_dest.get_mpi_address(), size, MPI_BYTE, rma_win); MPI_Win_unlock(remote_dest.node(), rma_win); } @@ -279,7 +279,7 @@ class communicator { //MPI_Isend((void*)local_source, size * sizeof(T), MPI_BYTE, remote_dest.node(), constants::DATA_TAG, MPI_COMM_WORLD, &req.next_mpi_request()); req.uses_rma = true; - MPI_Win_lock(MPI_LOCK_EXCLUSIVE, remote_dest.node(), 0, rma_win); + MPI_Win_lock(MPI_LOCK_SHARED, remote_dest.node(), 0, rma_win); MPI_Rput(local_source, size, MPI_BYTE, remote_dest.node(), (void *) remote_dest.get_mpi_address(), size, MPI_BYTE, rma_win, &re.next_mpi_request()); } diff --git a/include/ham/offload/offload.hpp b/include/ham/offload/offload.hpp index a3cff70..ff41fd7 100644 --- a/include/ham/offload/offload.hpp +++ b/include/ham/offload/offload.hpp @@ -239,6 +239,7 @@ future put(T* local_source, buffer_ptr& remote_dest, size_t n) future result(comm.allocate_request(remote_dest.node())); HAM_DEBUG( HAM_LOG << "offload::put(): initiating RMA put..." << std::endl; ) comm.send_data_async(result.get_request(), local_source, remote_dest, n); + return result; #endif } @@ -339,13 +340,30 @@ void copy_sync(buffer_ptr source, buffer_ptr dest, size_t n) write_result.get(); #endif #ifdef SOME_COOL_VAR_FOR_MPI_RMA_DYN // compile-integration pending - future result(comm.allocate_request(source.node())); - HAM_DEBUG( HAM_LOG << "offload::copy_sync(): initiating copy between " << source.node() << " and " << dest.node() << std::endl; ) - SEND READ_MSG to source (maybe introduce new copy_msg) - MAKE SURE there is no winlock on dest from host - - comm.send_data_async(result.get_request(), local_source, remote_dest, n); + // use async copy + sync + copy(source, dest, n).get(); +#endif +} #endif + + +#ifdef SOME_COOL_VAR_FOR_MPI_RMA_DYN // compile-integration pending +template +future copy(buffer_ptr source, buffer_ptr dest, size_t n) +{ + net::communicator& comm = runtime::instance().communicator(); + + // make sure there is no winlock on dest from host + // solution: shared lock + + // issues a put on the source node targeting the destination node + future result(comm.allocate_request(source.node())); + HAM_DEBUG( HAM_LOG << "offload::copy_sync(): initiating copy between " << source.node() << " and " << dest.node() << std::endl; ) + auto copy_msg = detail::offload_rma_copy_msg(result.get_request(), dest.node(), dest.get_mpi_address(), source.get(), n); + comm.send_msg(result.get_request(), (void*)©_msg, sizeof write_msg); + comm.recv_result(result.get_request()); + + return result; } #endif diff --git a/include/ham/offload/offload_msg.hpp b/include/ham/offload/offload_msg.hpp index 845dd08..b16a8a0 100644 --- a/include/ham/offload/offload_msg.hpp +++ b/include/ham/offload/offload_msg.hpp @@ -142,7 +142,7 @@ class offload_read_msg : public active_msg, ExecutionPolicy> { public: - offload_rma_copy_msg(communicator::request req, node_t remote_node, MPI_Aint remote_addr,T* local_source, size_t n) + offload_rma_copy_msg(communicator::request req, node_t remote_node, MPI_Aint remote_addr, T* local_source, size_t n) : req(req), remote_node(remote_node), remote_addr(remote_addr), local_source(local_source), n(n) { } void operator()() //const @@ -163,6 +163,7 @@ class offload_read_msg size_t n; }; //#endif + } // namespace detail } // namespace offload } // namespace ham From e684eb43da8e96d3eee7a9a14230cd44e22ce00b Mon Sep 17 00:00:00 2001 From: Deppisch Date: Thu, 29 Mar 2018 15:05:29 +0200 Subject: [PATCH 014/150] use shared locks for mpi rma windows --- include/ham/net/communicator.hpp | 4 ++- .../ham/net/communicator_mpi_rma_dynamic.hpp | 4 +-- include/ham/offload/offload.hpp | 30 +++++++++++++++---- include/ham/offload/offload_msg.hpp | 3 +- 4 files changed, 31 insertions(+), 10 deletions(-) diff --git a/include/ham/net/communicator.hpp b/include/ham/net/communicator.hpp index 52fe25b..4e84e2b 100644 --- a/include/ham/net/communicator.hpp +++ b/include/ham/net/communicator.hpp @@ -49,8 +49,10 @@ namespace net { #elif defined HAM_COMM_SCIF #define HAM_COMM_ONE_SIDED #include "ham/net/communicator_scif.hpp" +#elif defined HAM_COMM_MPI_RMA_DYNAMIC +#include "ham/net/communicator_scif.hpp" #else -static_assert(false, "Please define either HAM_COMM_MPI, or HAM_COMM_SCIF."); +static_assert(false, "Please define either HAM_COMM_MPI, HAM_COMM_MPI_RMA_DYNAMIC or HAM_COMM_SCIF."); #endif #endif // ham_net_communicator_hpp diff --git a/include/ham/net/communicator_mpi_rma_dynamic.hpp b/include/ham/net/communicator_mpi_rma_dynamic.hpp index d1b1add..ebe7a10 100644 --- a/include/ham/net/communicator_mpi_rma_dynamic.hpp +++ b/include/ham/net/communicator_mpi_rma_dynamic.hpp @@ -267,7 +267,7 @@ class communicator { void send_data(T* local_source, buffer_ptr remote_dest, size_t size) { //MPI_Send((void*)local_source, size * sizeof(T), MPI_BYTE, remote_dest.node(), constants::DATA_TAG, MPI_COMM_WORLD); - MPI_Win_lock(MPI_LOCK_EXCLUSIVE, remote_dest.node(), 0, rma_win); + MPI_Win_lock(MPI_LOCK_SHARED, remote_dest.node(), 0, rma_win); MPI_Put(local_source, size, MPI_BYTE, remote_dest.node(), (void *) remote_dest.get_mpi_address(), size, MPI_BYTE, rma_win); MPI_Win_unlock(remote_dest.node(), rma_win); } @@ -279,7 +279,7 @@ class communicator { //MPI_Isend((void*)local_source, size * sizeof(T), MPI_BYTE, remote_dest.node(), constants::DATA_TAG, MPI_COMM_WORLD, &req.next_mpi_request()); req.uses_rma = true; - MPI_Win_lock(MPI_LOCK_EXCLUSIVE, remote_dest.node(), 0, rma_win); + MPI_Win_lock(MPI_LOCK_SHARED, remote_dest.node(), 0, rma_win); MPI_Rput(local_source, size, MPI_BYTE, remote_dest.node(), (void *) remote_dest.get_mpi_address(), size, MPI_BYTE, rma_win, &re.next_mpi_request()); } diff --git a/include/ham/offload/offload.hpp b/include/ham/offload/offload.hpp index a3cff70..ff41fd7 100644 --- a/include/ham/offload/offload.hpp +++ b/include/ham/offload/offload.hpp @@ -239,6 +239,7 @@ future put(T* local_source, buffer_ptr& remote_dest, size_t n) future result(comm.allocate_request(remote_dest.node())); HAM_DEBUG( HAM_LOG << "offload::put(): initiating RMA put..." << std::endl; ) comm.send_data_async(result.get_request(), local_source, remote_dest, n); + return result; #endif } @@ -339,13 +340,30 @@ void copy_sync(buffer_ptr source, buffer_ptr dest, size_t n) write_result.get(); #endif #ifdef SOME_COOL_VAR_FOR_MPI_RMA_DYN // compile-integration pending - future result(comm.allocate_request(source.node())); - HAM_DEBUG( HAM_LOG << "offload::copy_sync(): initiating copy between " << source.node() << " and " << dest.node() << std::endl; ) - SEND READ_MSG to source (maybe introduce new copy_msg) - MAKE SURE there is no winlock on dest from host - - comm.send_data_async(result.get_request(), local_source, remote_dest, n); + // use async copy + sync + copy(source, dest, n).get(); +#endif +} #endif + + +#ifdef SOME_COOL_VAR_FOR_MPI_RMA_DYN // compile-integration pending +template +future copy(buffer_ptr source, buffer_ptr dest, size_t n) +{ + net::communicator& comm = runtime::instance().communicator(); + + // make sure there is no winlock on dest from host + // solution: shared lock + + // issues a put on the source node targeting the destination node + future result(comm.allocate_request(source.node())); + HAM_DEBUG( HAM_LOG << "offload::copy_sync(): initiating copy between " << source.node() << " and " << dest.node() << std::endl; ) + auto copy_msg = detail::offload_rma_copy_msg(result.get_request(), dest.node(), dest.get_mpi_address(), source.get(), n); + comm.send_msg(result.get_request(), (void*)©_msg, sizeof write_msg); + comm.recv_result(result.get_request()); + + return result; } #endif diff --git a/include/ham/offload/offload_msg.hpp b/include/ham/offload/offload_msg.hpp index 845dd08..b16a8a0 100644 --- a/include/ham/offload/offload_msg.hpp +++ b/include/ham/offload/offload_msg.hpp @@ -142,7 +142,7 @@ class offload_read_msg : public active_msg, ExecutionPolicy> { public: - offload_rma_copy_msg(communicator::request req, node_t remote_node, MPI_Aint remote_addr,T* local_source, size_t n) + offload_rma_copy_msg(communicator::request req, node_t remote_node, MPI_Aint remote_addr, T* local_source, size_t n) : req(req), remote_node(remote_node), remote_addr(remote_addr), local_source(local_source), n(n) { } void operator()() //const @@ -163,6 +163,7 @@ class offload_read_msg size_t n; }; //#endif + } // namespace detail } // namespace offload } // namespace ham From f6c6925f761e0f9cc3374ff0bb765d603deba8c8 Mon Sep 17 00:00:00 2001 From: Phuzzyhead Date: Thu, 29 Mar 2018 16:00:14 +0200 Subject: [PATCH 015/150] compile integration of rma dynamic backend --- Jamroot | 48 ++++++++++++++++++-- include/ham/offload/offload.hpp | 27 +++++------ src/ham/net/communicator_mpi_rma_dynamic.cpp | 9 ++++ 3 files changed, 64 insertions(+), 20 deletions(-) create mode 100644 src/ham/net/communicator_mpi_rma_dynamic.cpp diff --git a/Jamroot b/Jamroot index 1d27769..0e06729 100644 --- a/Jamroot +++ b/Jamroot @@ -56,6 +56,13 @@ obj offload_obj_mpi : ham/offload/offload.cpp : /mpi//mpi HAM_C constant OBJ_FILES_MPI : communicator_obj_mpi runtime_obj_mpi offload_obj_mpi communicator_mpi_obj_mpi ; +obj communicator_obj_mpi_rma_dyn : ham/net/communicator.cpp : /mpi//mpi HAM_COMM_MPI_RMA_DYNAMIC ; +obj communicator_mpi_rma_dyn_obj_mpi_rma_dyn : ham/net/communicator_mpi_rma_dynamic.cpp : /mpi//mpi HAM_COMM_MPI_RMA_DYNAMIC ; +obj runtime_obj_mpi_rma_dyn : ham/offload/runtime.cpp : /mpi//mpi HAM_COMM_MPI_RMA_DYNAMIC ; +obj offload_obj_mpi_rma_dyn : ham/offload/offload.cpp : /mpi//mpi HAM_COMM_MPI_RMA_DYNAMIC ; + +constant OBJ_FILES_MPI_RMA_DYN : communicator_obj_mpi_rma_dyn communicator_mpi_rma_dyn_obj_mpi_rma_dyn runtime_obj_mpi_rma_dyn offload_obj_mpi_rma_dyn ; + obj communicator_obj_scif : ham/net/communicator.cpp : scif HAM_COMM_SCIF ; obj communicator_scif_obj_scif : ham/net/communicator_scif.cpp : scif HAM_COMM_SCIF ; obj runtime_obj_scif : ham/offload/runtime.cpp : scif HAM_COMM_SCIF ; @@ -66,6 +73,7 @@ constant OBJ_FILES_SCIF : communicator_obj_scif runtime_obj_scif offload_obj_sci # Libraries obj main_obj_mpi : ham/offload/main.cpp : /mpi//mpi HAM_COMM_MPI ; +obj main_obj_mpi_rma_dyn : ham/offload/main.cpp : /mpi//mpi HAM_COMM_MPI_RMA_DYNAMIC ; obj main_obj_scif : ham/offload/main.cpp : scif HAM_COMM_SCIF ; lib ham_offload_mpi @@ -73,12 +81,18 @@ lib ham_offload_mpi : /mpi//mpi HAM_COMM_MPI ; +lib ham_offload_mpi_rma_dyn + : $(OBJ_FILES_COMMON) $(OBJ_FILES_MPI_RMA_DYN) main_obj_mpi_rma_dyn boost_program_options + : /mpi//mpi HAM_COMM_MPI_RMA_DYNAMIC + ; + lib ham_offload_scif : $(OBJ_FILES_COMMON) $(OBJ_FILES_SCIF) main_obj_scif boost_program_options : scif HAM_COMM_SCIF ; obj main_explicit_obj_mpi : ham/offload/main_explicit.cpp : /mpi//mpi HAM_COMM_MPI ; +obj main_explicit_obj_mpi_rma_dyn : ham/offload/main_explicit.cpp : /mpi//mpi HAM_COMM_MPI_RMA_DYNAMIC ; obj main_explicit_obj_scif : ham/offload/main_explicit.cpp : scif HAM_COMM_SCIF ; lib ham_offload_mpi_explicit @@ -86,6 +100,10 @@ lib ham_offload_mpi_explicit : /mpi//mpi HAM_COMM_MPI HAM_EXPLICIT ; +lib ham_offload_mpi_rma_dyn_explicit + : $(OBJ_FILES_COMMON) $(OBJ_FILES_MPI_RMA_DYN) main_explicit_obj_mpi_rma_dyn boost_program_options + : /mpi//mpi HAM_COMM_MPI_RMA_DYNAMIC HAM_EXPLICIT + lib ham_offload_scif_explicit : $(OBJ_FILES_COMMON) $(OBJ_FILES_SCIF) main_explicit_obj_scif boost_program_options : scif HAM_COMM_SCIF HAM_EXPLICIT @@ -99,6 +117,12 @@ exe benchmark_ham_offload_mpi : /mpi//mpi ham_offload_mpi ; +obj benchmark_ham_offload_mpi_rma_dyn_obj : benchmark_ham_offload.cpp : /mpi//mpi HAM_COMM_MPI_RMA_DYNAMIC ; +exe benchmark_ham_offload_mpi_rma_dyn + : benchmark_ham_offload_mpi_rma_dyn_obj boost_program_options + : /mpi//mpi ham_offload_mpi_rma_dyn + ; + obj benchmark_ham_offload_scif_obj : benchmark_ham_offload.cpp : scif HAM_COMM_SCIF ; exe benchmark_ham_offload_scif : benchmark_ham_offload_scif_obj boost_program_options ham_offload_scif @@ -121,14 +145,16 @@ exe active_msgs_over_file ; exe ham_offload - : ham_offload.cpp ham_offload_mpi boost_program_options - : /mpi//mpi HAM_COMM_MPI + : ham_offload.cpp ham_offload_mpi_rma_dyn boost_program_options +# : /mpi//mpi HAM_COMM_MPI + : /mpi//mpi HAM_COMM_MPI_RMA_DYNAMIC # : scif HAM_COMM_SCIF ; exe ham_offload_explicit - : ham_offload_explicit.cpp ham_offload_mpi_explicit boost_program_options - : /mpi//mpi HAM_COMM_MPI + : ham_offload_explicit.cpp ham_offload_mpi_rma_dyn_explicit boost_program_options +# : /mpi//mpi HAM_COMM_MPI + : /mpi//mpi HAM_COMM_MPI_RMA_DYNAMIC # : scif HAM_COMM_SCIF ; @@ -142,7 +168,11 @@ exe inner_product_mpi : /mpi//mpi HAM_COMM_MPI ; -# +exe inner_product_mpi_rma_dynamic + : [ obj inner_product_obj : inner_product.cpp : /mpi//mpi HAM_COMM_MPI ] ham_offload_mpi_rma_dyn boost_program_options + : /mpi//mpi HAM_COMM_MPI_RMA_DYNAMIC + ; + exe test_data_transfer_scif : [ obj test_data_transfer_obj : test_data_transfer.cpp : scif HAM_COMM_SCIF ] ham_offload_scif boost_program_options : scif HAM_COMM_SCIF @@ -153,6 +183,10 @@ exe test_data_transfer_mpi : /mpi//mpi HAM_COMM_MPI ; +exe test_data_transfer_mpi_rma_dynamic + : [ obj test_data_transfer_obj : test_data_transfer.cpp : /mpi//mpi HAM_COMM_MPI_RMA_DYNAMIC ] ham_offload_mpi_rma_dyn boost_program_options + : /mpi//mpi HAM_COMM_MPI_RMA_DYNAMIC + ; exe test_argument_transfer_scif : [ obj test_argument_transfer_obj : test_argument_transfer.cpp : scif HAM_COMM_SCIF ] ham_offload_scif boost_program_options @@ -164,6 +198,10 @@ exe test_argument_transfer_mpi : /mpi//mpi HAM_COMM_MPI ; +exe test_argument_transfer_mpi_rma_dynamic + : [ obj test_argument_transfer_obj : test_argument_transfer.cpp : /mpi//mpi HAM_COMM_MPI_RMA_DYNAMIC ] ham_offload_mpi_rma_dyn boost_program_options + : /mpi//mpi HAM_COMM_MPI_RMA_DYNAMIC + ; # Explicit targets (not built by default) explicit benchmark_intel_leo ; diff --git a/include/ham/offload/offload.hpp b/include/ham/offload/offload.hpp index ff41fd7..f721597 100644 --- a/include/ham/offload/offload.hpp +++ b/include/ham/offload/offload.hpp @@ -223,7 +223,7 @@ future put(T* local_source, buffer_ptr& remote_dest, size_t n) // TODO(improvement): create a data transfer thread for one-sided comm.send_data(local_source, remote_dest, n); // sync return future(true); // return dummy future -#else +#elif defined HAM_COMM_MPI // allocate a request and construct a future future result(comm.allocate_request(remote_dest.node())); // generate an offload message @@ -234,8 +234,7 @@ future put(T* local_source, buffer_ptr& remote_dest, size_t n) comm.recv_result(result.get_request()); // trigger receiving the msgs result // async return result; -#endif -#ifdef SOME_COOL_VAR_FOR_MPI_RMA_DYN // compile-time integration pending +#elif HAM_COMM_MPI_RMA_DYNAMIC future result(comm.allocate_request(remote_dest.node())); HAM_DEBUG( HAM_LOG << "offload::put(): initiating RMA put..." << std::endl; ) comm.send_data_async(result.get_request(), local_source, remote_dest, n); @@ -262,7 +261,7 @@ future get(buffer_ptr remote_source, T* local_dest, size_t n) // TODO(improvement): create a data transfer thread for one-sided comm.recv_data(remote_source, local_dest, n); // sync return future(true); // return dummy future -#else +#elif defined HAM_COMM_MPI // allocate a request and construct a future future result(comm.allocate_request(remote_source.node())); // generate an offload message @@ -273,8 +272,7 @@ future get(buffer_ptr remote_source, T* local_dest, size_t n) comm.recv_result(result.get_request()); // trigger receiving the result return result; -#endif -#ifdef SOME_COOL_VAR_FOR_MPI_RMA_DYN // compile-time integration pending +#elif defined HAM_COMM_MPI_RMA_DYNAMIC future result(comm.allocate_request(remote_dest.node())); HAM_DEBUG( HAM_LOG << "offload::put(): initiating RMA get..." << std::endl; ) comm.recv_data_async(result.get_request(), remote_source, local_dest, n); @@ -320,7 +318,7 @@ void copy_sync(buffer_ptr source, buffer_ptr dest, size_t n) // fix 1st arg: // comm.send_data(src_node, local_source, remote_dest, n); // static_assert(false, "copy is not implemented yet for the SCIF back-end"); -#else +#elif defined HAM_COMM_MPI // send corresponding write and read messages to the sender and the receiver // issues a send operation on the source node, that sends the memory at source to the destination node @@ -338,8 +336,7 @@ void copy_sync(buffer_ptr source, buffer_ptr dest, size_t n) // synchronise read_result.get(); write_result.get(); -#endif -#ifdef SOME_COOL_VAR_FOR_MPI_RMA_DYN // compile-integration pending +#elif defined HAM_COMM_MPI_RMA_DYNAMIC // use async copy + sync copy(source, dest, n).get(); #endif @@ -347,23 +344,23 @@ void copy_sync(buffer_ptr source, buffer_ptr dest, size_t n) #endif -#ifdef SOME_COOL_VAR_FOR_MPI_RMA_DYN // compile-integration pending +#ifdef HAM_COMM_MPI_RMA_DYNAMIC // compile-integration pending template future copy(buffer_ptr source, buffer_ptr dest, size_t n) { net::communicator& comm = runtime::instance().communicator(); // make sure there is no winlock on dest from host - // solution: shared lock + // solution: shared lock, unlocking from host not necessary // issues a put on the source node targeting the destination node future result(comm.allocate_request(source.node())); HAM_DEBUG( HAM_LOG << "offload::copy_sync(): initiating copy between " << source.node() << " and " << dest.node() << std::endl; ) - auto copy_msg = detail::offload_rma_copy_msg(result.get_request(), dest.node(), dest.get_mpi_address(), source.get(), n); - comm.send_msg(result.get_request(), (void*)©_msg, sizeof write_msg); - comm.recv_result(result.get_request()); + auto copy_msg = detail::offload_rma_copy_msg(result.get_request(), dest.node(), dest.get_mpi_address(), source.get(), n); + comm.send_msg(result.get_request(), (void*)©_msg, sizeof write_msg); + comm.recv_result(result.get_request()); - return result; + return result; } #endif diff --git a/src/ham/net/communicator_mpi_rma_dynamic.cpp b/src/ham/net/communicator_mpi_rma_dynamic.cpp new file mode 100644 index 0000000..e4e5dbd --- /dev/null +++ b/src/ham/net/communicator_mpi_rma_dynamic.cpp @@ -0,0 +1,9 @@ +// Copyright (c) 2013-2014 Matthias Noack (ma.noack.pr@gmail.com) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include "ham/net/communicator.hpp" + +ham::net::communicator* ham::net::communicator::instance_ = nullptr; + From 6c15d13655d8526e11a835b0d5d1d70aaa0f6b00 Mon Sep 17 00:00:00 2001 From: Phuzzyhead Date: Thu, 29 Mar 2018 16:00:14 +0200 Subject: [PATCH 016/150] compile integration of rma dynamic backend --- Jamroot | 48 ++++++++++++++++++-- include/ham/offload/offload.hpp | 27 +++++------ src/ham/net/communicator_mpi_rma_dynamic.cpp | 9 ++++ 3 files changed, 64 insertions(+), 20 deletions(-) create mode 100644 src/ham/net/communicator_mpi_rma_dynamic.cpp diff --git a/Jamroot b/Jamroot index 1d27769..0e06729 100644 --- a/Jamroot +++ b/Jamroot @@ -56,6 +56,13 @@ obj offload_obj_mpi : ham/offload/offload.cpp : /mpi//mpi HAM_C constant OBJ_FILES_MPI : communicator_obj_mpi runtime_obj_mpi offload_obj_mpi communicator_mpi_obj_mpi ; +obj communicator_obj_mpi_rma_dyn : ham/net/communicator.cpp : /mpi//mpi HAM_COMM_MPI_RMA_DYNAMIC ; +obj communicator_mpi_rma_dyn_obj_mpi_rma_dyn : ham/net/communicator_mpi_rma_dynamic.cpp : /mpi//mpi HAM_COMM_MPI_RMA_DYNAMIC ; +obj runtime_obj_mpi_rma_dyn : ham/offload/runtime.cpp : /mpi//mpi HAM_COMM_MPI_RMA_DYNAMIC ; +obj offload_obj_mpi_rma_dyn : ham/offload/offload.cpp : /mpi//mpi HAM_COMM_MPI_RMA_DYNAMIC ; + +constant OBJ_FILES_MPI_RMA_DYN : communicator_obj_mpi_rma_dyn communicator_mpi_rma_dyn_obj_mpi_rma_dyn runtime_obj_mpi_rma_dyn offload_obj_mpi_rma_dyn ; + obj communicator_obj_scif : ham/net/communicator.cpp : scif HAM_COMM_SCIF ; obj communicator_scif_obj_scif : ham/net/communicator_scif.cpp : scif HAM_COMM_SCIF ; obj runtime_obj_scif : ham/offload/runtime.cpp : scif HAM_COMM_SCIF ; @@ -66,6 +73,7 @@ constant OBJ_FILES_SCIF : communicator_obj_scif runtime_obj_scif offload_obj_sci # Libraries obj main_obj_mpi : ham/offload/main.cpp : /mpi//mpi HAM_COMM_MPI ; +obj main_obj_mpi_rma_dyn : ham/offload/main.cpp : /mpi//mpi HAM_COMM_MPI_RMA_DYNAMIC ; obj main_obj_scif : ham/offload/main.cpp : scif HAM_COMM_SCIF ; lib ham_offload_mpi @@ -73,12 +81,18 @@ lib ham_offload_mpi : /mpi//mpi HAM_COMM_MPI ; +lib ham_offload_mpi_rma_dyn + : $(OBJ_FILES_COMMON) $(OBJ_FILES_MPI_RMA_DYN) main_obj_mpi_rma_dyn boost_program_options + : /mpi//mpi HAM_COMM_MPI_RMA_DYNAMIC + ; + lib ham_offload_scif : $(OBJ_FILES_COMMON) $(OBJ_FILES_SCIF) main_obj_scif boost_program_options : scif HAM_COMM_SCIF ; obj main_explicit_obj_mpi : ham/offload/main_explicit.cpp : /mpi//mpi HAM_COMM_MPI ; +obj main_explicit_obj_mpi_rma_dyn : ham/offload/main_explicit.cpp : /mpi//mpi HAM_COMM_MPI_RMA_DYNAMIC ; obj main_explicit_obj_scif : ham/offload/main_explicit.cpp : scif HAM_COMM_SCIF ; lib ham_offload_mpi_explicit @@ -86,6 +100,10 @@ lib ham_offload_mpi_explicit : /mpi//mpi HAM_COMM_MPI HAM_EXPLICIT ; +lib ham_offload_mpi_rma_dyn_explicit + : $(OBJ_FILES_COMMON) $(OBJ_FILES_MPI_RMA_DYN) main_explicit_obj_mpi_rma_dyn boost_program_options + : /mpi//mpi HAM_COMM_MPI_RMA_DYNAMIC HAM_EXPLICIT + lib ham_offload_scif_explicit : $(OBJ_FILES_COMMON) $(OBJ_FILES_SCIF) main_explicit_obj_scif boost_program_options : scif HAM_COMM_SCIF HAM_EXPLICIT @@ -99,6 +117,12 @@ exe benchmark_ham_offload_mpi : /mpi//mpi ham_offload_mpi ; +obj benchmark_ham_offload_mpi_rma_dyn_obj : benchmark_ham_offload.cpp : /mpi//mpi HAM_COMM_MPI_RMA_DYNAMIC ; +exe benchmark_ham_offload_mpi_rma_dyn + : benchmark_ham_offload_mpi_rma_dyn_obj boost_program_options + : /mpi//mpi ham_offload_mpi_rma_dyn + ; + obj benchmark_ham_offload_scif_obj : benchmark_ham_offload.cpp : scif HAM_COMM_SCIF ; exe benchmark_ham_offload_scif : benchmark_ham_offload_scif_obj boost_program_options ham_offload_scif @@ -121,14 +145,16 @@ exe active_msgs_over_file ; exe ham_offload - : ham_offload.cpp ham_offload_mpi boost_program_options - : /mpi//mpi HAM_COMM_MPI + : ham_offload.cpp ham_offload_mpi_rma_dyn boost_program_options +# : /mpi//mpi HAM_COMM_MPI + : /mpi//mpi HAM_COMM_MPI_RMA_DYNAMIC # : scif HAM_COMM_SCIF ; exe ham_offload_explicit - : ham_offload_explicit.cpp ham_offload_mpi_explicit boost_program_options - : /mpi//mpi HAM_COMM_MPI + : ham_offload_explicit.cpp ham_offload_mpi_rma_dyn_explicit boost_program_options +# : /mpi//mpi HAM_COMM_MPI + : /mpi//mpi HAM_COMM_MPI_RMA_DYNAMIC # : scif HAM_COMM_SCIF ; @@ -142,7 +168,11 @@ exe inner_product_mpi : /mpi//mpi HAM_COMM_MPI ; -# +exe inner_product_mpi_rma_dynamic + : [ obj inner_product_obj : inner_product.cpp : /mpi//mpi HAM_COMM_MPI ] ham_offload_mpi_rma_dyn boost_program_options + : /mpi//mpi HAM_COMM_MPI_RMA_DYNAMIC + ; + exe test_data_transfer_scif : [ obj test_data_transfer_obj : test_data_transfer.cpp : scif HAM_COMM_SCIF ] ham_offload_scif boost_program_options : scif HAM_COMM_SCIF @@ -153,6 +183,10 @@ exe test_data_transfer_mpi : /mpi//mpi HAM_COMM_MPI ; +exe test_data_transfer_mpi_rma_dynamic + : [ obj test_data_transfer_obj : test_data_transfer.cpp : /mpi//mpi HAM_COMM_MPI_RMA_DYNAMIC ] ham_offload_mpi_rma_dyn boost_program_options + : /mpi//mpi HAM_COMM_MPI_RMA_DYNAMIC + ; exe test_argument_transfer_scif : [ obj test_argument_transfer_obj : test_argument_transfer.cpp : scif HAM_COMM_SCIF ] ham_offload_scif boost_program_options @@ -164,6 +198,10 @@ exe test_argument_transfer_mpi : /mpi//mpi HAM_COMM_MPI ; +exe test_argument_transfer_mpi_rma_dynamic + : [ obj test_argument_transfer_obj : test_argument_transfer.cpp : /mpi//mpi HAM_COMM_MPI_RMA_DYNAMIC ] ham_offload_mpi_rma_dyn boost_program_options + : /mpi//mpi HAM_COMM_MPI_RMA_DYNAMIC + ; # Explicit targets (not built by default) explicit benchmark_intel_leo ; diff --git a/include/ham/offload/offload.hpp b/include/ham/offload/offload.hpp index ff41fd7..f721597 100644 --- a/include/ham/offload/offload.hpp +++ b/include/ham/offload/offload.hpp @@ -223,7 +223,7 @@ future put(T* local_source, buffer_ptr& remote_dest, size_t n) // TODO(improvement): create a data transfer thread for one-sided comm.send_data(local_source, remote_dest, n); // sync return future(true); // return dummy future -#else +#elif defined HAM_COMM_MPI // allocate a request and construct a future future result(comm.allocate_request(remote_dest.node())); // generate an offload message @@ -234,8 +234,7 @@ future put(T* local_source, buffer_ptr& remote_dest, size_t n) comm.recv_result(result.get_request()); // trigger receiving the msgs result // async return result; -#endif -#ifdef SOME_COOL_VAR_FOR_MPI_RMA_DYN // compile-time integration pending +#elif HAM_COMM_MPI_RMA_DYNAMIC future result(comm.allocate_request(remote_dest.node())); HAM_DEBUG( HAM_LOG << "offload::put(): initiating RMA put..." << std::endl; ) comm.send_data_async(result.get_request(), local_source, remote_dest, n); @@ -262,7 +261,7 @@ future get(buffer_ptr remote_source, T* local_dest, size_t n) // TODO(improvement): create a data transfer thread for one-sided comm.recv_data(remote_source, local_dest, n); // sync return future(true); // return dummy future -#else +#elif defined HAM_COMM_MPI // allocate a request and construct a future future result(comm.allocate_request(remote_source.node())); // generate an offload message @@ -273,8 +272,7 @@ future get(buffer_ptr remote_source, T* local_dest, size_t n) comm.recv_result(result.get_request()); // trigger receiving the result return result; -#endif -#ifdef SOME_COOL_VAR_FOR_MPI_RMA_DYN // compile-time integration pending +#elif defined HAM_COMM_MPI_RMA_DYNAMIC future result(comm.allocate_request(remote_dest.node())); HAM_DEBUG( HAM_LOG << "offload::put(): initiating RMA get..." << std::endl; ) comm.recv_data_async(result.get_request(), remote_source, local_dest, n); @@ -320,7 +318,7 @@ void copy_sync(buffer_ptr source, buffer_ptr dest, size_t n) // fix 1st arg: // comm.send_data(src_node, local_source, remote_dest, n); // static_assert(false, "copy is not implemented yet for the SCIF back-end"); -#else +#elif defined HAM_COMM_MPI // send corresponding write and read messages to the sender and the receiver // issues a send operation on the source node, that sends the memory at source to the destination node @@ -338,8 +336,7 @@ void copy_sync(buffer_ptr source, buffer_ptr dest, size_t n) // synchronise read_result.get(); write_result.get(); -#endif -#ifdef SOME_COOL_VAR_FOR_MPI_RMA_DYN // compile-integration pending +#elif defined HAM_COMM_MPI_RMA_DYNAMIC // use async copy + sync copy(source, dest, n).get(); #endif @@ -347,23 +344,23 @@ void copy_sync(buffer_ptr source, buffer_ptr dest, size_t n) #endif -#ifdef SOME_COOL_VAR_FOR_MPI_RMA_DYN // compile-integration pending +#ifdef HAM_COMM_MPI_RMA_DYNAMIC // compile-integration pending template future copy(buffer_ptr source, buffer_ptr dest, size_t n) { net::communicator& comm = runtime::instance().communicator(); // make sure there is no winlock on dest from host - // solution: shared lock + // solution: shared lock, unlocking from host not necessary // issues a put on the source node targeting the destination node future result(comm.allocate_request(source.node())); HAM_DEBUG( HAM_LOG << "offload::copy_sync(): initiating copy between " << source.node() << " and " << dest.node() << std::endl; ) - auto copy_msg = detail::offload_rma_copy_msg(result.get_request(), dest.node(), dest.get_mpi_address(), source.get(), n); - comm.send_msg(result.get_request(), (void*)©_msg, sizeof write_msg); - comm.recv_result(result.get_request()); + auto copy_msg = detail::offload_rma_copy_msg(result.get_request(), dest.node(), dest.get_mpi_address(), source.get(), n); + comm.send_msg(result.get_request(), (void*)©_msg, sizeof write_msg); + comm.recv_result(result.get_request()); - return result; + return result; } #endif diff --git a/src/ham/net/communicator_mpi_rma_dynamic.cpp b/src/ham/net/communicator_mpi_rma_dynamic.cpp new file mode 100644 index 0000000..e4e5dbd --- /dev/null +++ b/src/ham/net/communicator_mpi_rma_dynamic.cpp @@ -0,0 +1,9 @@ +// Copyright (c) 2013-2014 Matthias Noack (ma.noack.pr@gmail.com) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include "ham/net/communicator.hpp" + +ham::net::communicator* ham::net::communicator::instance_ = nullptr; + From cc24b4f647b1685c88e6159ddb8f3bbbb4a35f86 Mon Sep 17 00:00:00 2001 From: Daniel Deppisch Date: Wed, 4 Apr 2018 14:45:08 +0200 Subject: [PATCH 017/150] bugfixes --- Jamroot | 99 ++++++++++--------- include/ham/net/communicator.hpp | 2 +- .../ham/net/communicator_mpi_rma_dynamic.hpp | 53 +++++++--- include/ham/offload/offload.hpp | 44 +++++---- include/ham/offload/offload_msg.hpp | 2 +- src/inner_product.cpp | 33 +++++-- src/test_data_transfer.cpp | 38 ++++++- tools/install_boost.sh | 2 +- 8 files changed, 175 insertions(+), 98 deletions(-) diff --git a/Jamroot b/Jamroot index 0e06729..ccccfe3 100644 --- a/Jamroot +++ b/Jamroot @@ -24,7 +24,7 @@ rule get-boost-lib-path ( properties * ) } lib boost_program_options : : boost_program_options @get-boost-lib-path ; -lib scif : : scif ; +# lib scif : : scif ; project HAM : source-location $(SRC) @@ -36,7 +36,7 @@ project HAM # intel:"-static-intel" on # off, on, full #speed # off, speed, space - "-std=c++11" + "-hstd=c++11" multi # static : default-build debug release debug_mic release_mic @@ -56,25 +56,25 @@ obj offload_obj_mpi : ham/offload/offload.cpp : /mpi//mpi HAM_C constant OBJ_FILES_MPI : communicator_obj_mpi runtime_obj_mpi offload_obj_mpi communicator_mpi_obj_mpi ; -obj communicator_obj_mpi_rma_dyn : ham/net/communicator.cpp : /mpi//mpi HAM_COMM_MPI_RMA_DYNAMIC ; -obj communicator_mpi_rma_dyn_obj_mpi_rma_dyn : ham/net/communicator_mpi_rma_dynamic.cpp : /mpi//mpi HAM_COMM_MPI_RMA_DYNAMIC ; -obj runtime_obj_mpi_rma_dyn : ham/offload/runtime.cpp : /mpi//mpi HAM_COMM_MPI_RMA_DYNAMIC ; -obj offload_obj_mpi_rma_dyn : ham/offload/offload.cpp : /mpi//mpi HAM_COMM_MPI_RMA_DYNAMIC ; +obj communicator_obj_mpi_rma_dyn : ham/net/communicator.cpp : /mpi//mpi HAM_COMM_MPI_RMA_DYNAMIC HAM_DEBUG_ON ; +obj communicator_mpi_rma_dyn_obj_mpi_rma_dyn : ham/net/communicator_mpi_rma_dynamic.cpp : /mpi//mpi HAM_COMM_MPI_RMA_DYNAMIC HAM_DEBUG_ON ; +obj runtime_obj_mpi_rma_dyn : ham/offload/runtime.cpp : /mpi//mpi HAM_COMM_MPI_RMA_DYNAMIC HAM_DEBUG_ON ; +obj offload_obj_mpi_rma_dyn : ham/offload/offload.cpp : /mpi//mpi HAM_COMM_MPI_RMA_DYNAMIC HAM_DEBUG_ON ; constant OBJ_FILES_MPI_RMA_DYN : communicator_obj_mpi_rma_dyn communicator_mpi_rma_dyn_obj_mpi_rma_dyn runtime_obj_mpi_rma_dyn offload_obj_mpi_rma_dyn ; -obj communicator_obj_scif : ham/net/communicator.cpp : scif HAM_COMM_SCIF ; -obj communicator_scif_obj_scif : ham/net/communicator_scif.cpp : scif HAM_COMM_SCIF ; -obj runtime_obj_scif : ham/offload/runtime.cpp : scif HAM_COMM_SCIF ; -obj offload_obj_scif : ham/offload/offload.cpp : scif HAM_COMM_SCIF ; +# obj communicator_obj_scif : ham/net/communicator.cpp : scif HAM_COMM_SCIF ; +# obj communicator_scif_obj_scif : ham/net/communicator_scif.cpp : scif HAM_COMM_SCIF ; +# obj runtime_obj_scif : ham/offload/runtime.cpp : scif HAM_COMM_SCIF ; +# obj offload_obj_scif : ham/offload/offload.cpp : scif HAM_COMM_SCIF ; -constant OBJ_FILES_SCIF : communicator_obj_scif runtime_obj_scif offload_obj_scif communicator_scif_obj_scif ; +# constant OBJ_FILES_SCIF : communicator_obj_scif runtime_obj_scif offload_obj_scif communicator_scif_obj_scif ; # Libraries obj main_obj_mpi : ham/offload/main.cpp : /mpi//mpi HAM_COMM_MPI ; -obj main_obj_mpi_rma_dyn : ham/offload/main.cpp : /mpi//mpi HAM_COMM_MPI_RMA_DYNAMIC ; -obj main_obj_scif : ham/offload/main.cpp : scif HAM_COMM_SCIF ; +obj main_obj_mpi_rma_dyn : ham/offload/main.cpp : /mpi//mpi HAM_COMM_MPI_RMA_DYNAMIC HAM_DEBUG_ON ; +# obj main_obj_scif : ham/offload/main.cpp : scif HAM_COMM_SCIF ; lib ham_offload_mpi : $(OBJ_FILES_COMMON) $(OBJ_FILES_MPI) main_obj_mpi boost_program_options @@ -83,17 +83,17 @@ lib ham_offload_mpi lib ham_offload_mpi_rma_dyn : $(OBJ_FILES_COMMON) $(OBJ_FILES_MPI_RMA_DYN) main_obj_mpi_rma_dyn boost_program_options - : /mpi//mpi HAM_COMM_MPI_RMA_DYNAMIC + : /mpi//mpi HAM_COMM_MPI_RMA_DYNAMIC HAM_DEBUG_ON ; -lib ham_offload_scif - : $(OBJ_FILES_COMMON) $(OBJ_FILES_SCIF) main_obj_scif boost_program_options - : scif HAM_COMM_SCIF - ; +# lib ham_offload_scif +# : $(OBJ_FILES_COMMON) $(OBJ_FILES_SCIF) main_obj_scif boost_program_options +# : scif HAM_COMM_SCIF +# ; obj main_explicit_obj_mpi : ham/offload/main_explicit.cpp : /mpi//mpi HAM_COMM_MPI ; -obj main_explicit_obj_mpi_rma_dyn : ham/offload/main_explicit.cpp : /mpi//mpi HAM_COMM_MPI_RMA_DYNAMIC ; -obj main_explicit_obj_scif : ham/offload/main_explicit.cpp : scif HAM_COMM_SCIF ; +obj main_explicit_obj_mpi_rma_dyn : ham/offload/main_explicit.cpp : /mpi//mpi HAM_COMM_MPI_RMA_DYNAMIC HAM_DEBUG_ON ; +# obj main_explicit_obj_scif : ham/offload/main_explicit.cpp : scif HAM_COMM_SCIF ; lib ham_offload_mpi_explicit : $(OBJ_FILES_COMMON) $(OBJ_FILES_MPI) main_explicit_obj_mpi boost_program_options @@ -102,12 +102,13 @@ lib ham_offload_mpi_explicit lib ham_offload_mpi_rma_dyn_explicit : $(OBJ_FILES_COMMON) $(OBJ_FILES_MPI_RMA_DYN) main_explicit_obj_mpi_rma_dyn boost_program_options - : /mpi//mpi HAM_COMM_MPI_RMA_DYNAMIC HAM_EXPLICIT + : /mpi//mpi HAM_COMM_MPI_RMA_DYNAMIC HAM_EXPLICIT HAM_DEBUG_ON + ; -lib ham_offload_scif_explicit - : $(OBJ_FILES_COMMON) $(OBJ_FILES_SCIF) main_explicit_obj_scif boost_program_options - : scif HAM_COMM_SCIF HAM_EXPLICIT - ; +# lib ham_offload_scif_explicit +# : $(OBJ_FILES_COMMON) $(OBJ_FILES_SCIF) main_explicit_obj_scif boost_program_options +# : scif HAM_COMM_SCIF HAM_EXPLICIT +# ; # Benchmarks @@ -117,17 +118,17 @@ exe benchmark_ham_offload_mpi : /mpi//mpi ham_offload_mpi ; -obj benchmark_ham_offload_mpi_rma_dyn_obj : benchmark_ham_offload.cpp : /mpi//mpi HAM_COMM_MPI_RMA_DYNAMIC ; +obj benchmark_ham_offload_mpi_rma_dyn_obj : benchmark_ham_offload.cpp : /mpi//mpi HAM_COMM_MPI_RMA_DYNAMIC HAM_DEBUG_ON ; exe benchmark_ham_offload_mpi_rma_dyn : benchmark_ham_offload_mpi_rma_dyn_obj boost_program_options - : /mpi//mpi ham_offload_mpi_rma_dyn + : /mpi//mpi ham_offload_mpi_rma_dyn HAM_DEBUG_ON ; -obj benchmark_ham_offload_scif_obj : benchmark_ham_offload.cpp : scif HAM_COMM_SCIF ; -exe benchmark_ham_offload_scif - : benchmark_ham_offload_scif_obj boost_program_options ham_offload_scif - : scif - ; +# obj benchmark_ham_offload_scif_obj : benchmark_ham_offload.cpp : scif HAM_COMM_SCIF ; +# exe benchmark_ham_offload_scif +# : benchmark_ham_offload_scif_obj boost_program_options ham_offload_scif +# : scif +# ; exe benchmark_intel_leo : benchmark_intel_leo.cpp boost_program_options @@ -154,14 +155,14 @@ exe ham_offload exe ham_offload_explicit : ham_offload_explicit.cpp ham_offload_mpi_rma_dyn_explicit boost_program_options # : /mpi//mpi HAM_COMM_MPI - : /mpi//mpi HAM_COMM_MPI_RMA_DYNAMIC + : /mpi//mpi HAM_COMM_MPI_RMA_DYNAMIC HAM_DEBUG_ON # : scif HAM_COMM_SCIF ; -exe inner_product_scif - : [ obj inner_product_obj : inner_product.cpp : scif HAM_COMM_SCIF ] ham_offload_scif boost_program_options - : scif HAM_COMM_SCIF - ; +# exe inner_product_scif +# : [ obj inner_product_obj : inner_product.cpp : scif HAM_COMM_SCIF ] ham_offload_scif boost_program_options +# : scif HAM_COMM_SCIF +# ; exe inner_product_mpi : [ obj inner_product_obj : inner_product.cpp : /mpi//mpi HAM_COMM_MPI ] ham_offload_mpi boost_program_options @@ -170,13 +171,13 @@ exe inner_product_mpi exe inner_product_mpi_rma_dynamic : [ obj inner_product_obj : inner_product.cpp : /mpi//mpi HAM_COMM_MPI ] ham_offload_mpi_rma_dyn boost_program_options - : /mpi//mpi HAM_COMM_MPI_RMA_DYNAMIC + : /mpi//mpi HAM_COMM_MPI_RMA_DYNAMIC HAM_DEBUG_ON ; -exe test_data_transfer_scif - : [ obj test_data_transfer_obj : test_data_transfer.cpp : scif HAM_COMM_SCIF ] ham_offload_scif boost_program_options - : scif HAM_COMM_SCIF - ; +# exe test_data_transfer_scif +# : [ obj test_data_transfer_obj : test_data_transfer.cpp : scif HAM_COMM_SCIF ] ham_offload_scif boost_program_options +# : scif HAM_COMM_SCIF +# ; exe test_data_transfer_mpi : [ obj test_data_transfer_obj : test_data_transfer.cpp : /mpi//mpi HAM_COMM_MPI ] ham_offload_mpi boost_program_options @@ -185,13 +186,13 @@ exe test_data_transfer_mpi exe test_data_transfer_mpi_rma_dynamic : [ obj test_data_transfer_obj : test_data_transfer.cpp : /mpi//mpi HAM_COMM_MPI_RMA_DYNAMIC ] ham_offload_mpi_rma_dyn boost_program_options - : /mpi//mpi HAM_COMM_MPI_RMA_DYNAMIC + : /mpi//mpi HAM_COMM_MPI_RMA_DYNAMIC HAM_DEBUG_ON ; -exe test_argument_transfer_scif - : [ obj test_argument_transfer_obj : test_argument_transfer.cpp : scif HAM_COMM_SCIF ] ham_offload_scif boost_program_options - : scif HAM_COMM_SCIF - ; +# exe test_argument_transfer_scif +# : [ obj test_argument_transfer_obj : test_argument_transfer.cpp : scif HAM_COMM_SCIF ] ham_offload_scif boost_program_options +# : scif HAM_COMM_SCIF +# ; exe test_argument_transfer_mpi : [ obj test_argument_transfer_obj : test_argument_transfer.cpp : /mpi//mpi HAM_COMM_MPI ] ham_offload_mpi boost_program_options @@ -200,9 +201,9 @@ exe test_argument_transfer_mpi exe test_argument_transfer_mpi_rma_dynamic : [ obj test_argument_transfer_obj : test_argument_transfer.cpp : /mpi//mpi HAM_COMM_MPI_RMA_DYNAMIC ] ham_offload_mpi_rma_dyn boost_program_options - : /mpi//mpi HAM_COMM_MPI_RMA_DYNAMIC + : /mpi//mpi HAM_COMM_MPI_RMA_DYNAMIC HAM_DEBUG_ON ; # Explicit targets (not built by default) explicit benchmark_intel_leo ; -explicit test_data_transfer_scif ; +# explicit test_data_transfer_scif ; diff --git a/include/ham/net/communicator.hpp b/include/ham/net/communicator.hpp index 4e84e2b..c754f99 100644 --- a/include/ham/net/communicator.hpp +++ b/include/ham/net/communicator.hpp @@ -50,7 +50,7 @@ namespace net { #define HAM_COMM_ONE_SIDED #include "ham/net/communicator_scif.hpp" #elif defined HAM_COMM_MPI_RMA_DYNAMIC -#include "ham/net/communicator_scif.hpp" +#include "ham/net/communicator_mpi_rma_dynamic.hpp" #else static_assert(false, "Please define either HAM_COMM_MPI, HAM_COMM_MPI_RMA_DYNAMIC or HAM_COMM_SCIF."); #endif diff --git a/include/ham/net/communicator_mpi_rma_dynamic.hpp b/include/ham/net/communicator_mpi_rma_dynamic.hpp index ebe7a10..3e28440 100644 --- a/include/ham/net/communicator_mpi_rma_dynamic.hpp +++ b/include/ham/net/communicator_mpi_rma_dynamic.hpp @@ -70,7 +70,7 @@ class communicator { request() : valid_(false) {} // instantiate invalid request(node_t target_node, node_t source_node, size_t send_buffer_index, size_t recv_buffer_index) - : target_node(target_node), source_node(source_node), valid_(true), send_buffer_index(send_buffer_index), recv_buffer_index(recv_buffer_index), req_count(0) + : target_node(target_node), source_node(source_node), valid_(true), send_buffer_index(send_buffer_index), recv_buffer_index(recv_buffer_index), req_count(0), uses_rma_(false) {} // return true if request was finished @@ -80,7 +80,7 @@ class communicator { int flag = 0; MPI_Testall(req_count, mpi_reqs, &flag, MPI_STATUS_IGNORE); // just test the receive request, since the send belonging to the request triggers the remote send that is received - if(uses_rma) + if(uses_rma_) { HAM_DEBUG( HAM_LOG << "request::test(), warning: may give false positive on rma remote completion" << std::endl; ) } @@ -93,9 +93,9 @@ class communicator { HAM_DEBUG( HAM_LOG << "request::get(), before MPI_Waitall()" << std::endl; ) MPI_Waitall(req_count, mpi_reqs, MPI_STATUS_IGNORE); // must wait for all requests to satisfy the standard HAM_DEBUG( HAM_LOG << "request::get(), after MPI_Waitall()" << std::endl; ) - if(uses_rma) + if(uses_rma_) { - MPI_Win_unlock(target_node, rma_win); + MPI_Win_unlock(target_node, communicator::instance().rma_win); } return static_cast(&communicator::instance().peers[target_node].msg_buffers[recv_buffer_index]); } @@ -130,7 +130,7 @@ class communicator { node_t target_node; node_t source_node; bool valid_; - bool uses_rma; + bool uses_rma_; // only needed by the sender enum { NUM_REQUESTS = 3 }; @@ -198,7 +198,7 @@ class communicator { if (is_host()) { for (node_t i = 1; i < nodes_; ++i) { // TODO(improvement): needs to be changed when host-rank becomes configurable // allocate buffers - peers[i].msg_buffers = allocate_buffer(constants::MSG_BUFFERS, this_node_); + peers[i].msg_buffers = allocate_peer_buffer(constants::MSG_BUFFERS, this_node_); // fill resource pools for(size_t j = constants::MSG_BUFFERS; j > 0; --j) { peers[i].buffer_pool.add(j-1); @@ -268,7 +268,7 @@ class communicator { { //MPI_Send((void*)local_source, size * sizeof(T), MPI_BYTE, remote_dest.node(), constants::DATA_TAG, MPI_COMM_WORLD); MPI_Win_lock(MPI_LOCK_SHARED, remote_dest.node(), 0, rma_win); - MPI_Put(local_source, size, MPI_BYTE, remote_dest.node(), (void *) remote_dest.get_mpi_address(), size, MPI_BYTE, rma_win); + MPI_Put(local_source, size * sizeof(T), MPI_BYTE, remote_dest.node(), remote_dest.get_mpi_address(), size * sizeof(T), MPI_BYTE, rma_win); MPI_Win_unlock(remote_dest.node(), rma_win); } @@ -277,10 +277,10 @@ class communicator { void send_data_async(request_reference_type req, T* local_source, buffer_ptr remote_dest, size_t size) { //MPI_Isend((void*)local_source, size * sizeof(T), MPI_BYTE, remote_dest.node(), constants::DATA_TAG, MPI_COMM_WORLD, &req.next_mpi_request()); - req.uses_rma = true; + req.uses_rma_ = true; MPI_Win_lock(MPI_LOCK_SHARED, remote_dest.node(), 0, rma_win); - MPI_Rput(local_source, size, MPI_BYTE, remote_dest.node(), (void *) remote_dest.get_mpi_address(), size, MPI_BYTE, rma_win, &re.next_mpi_request()); + MPI_Rput(local_source, size * sizeof(T), MPI_BYTE, remote_dest.node(), remote_dest.get_mpi_address(), size * sizeof(T), MPI_BYTE, rma_win, &req.next_mpi_request()); } @@ -288,8 +288,9 @@ class communicator { void recv_data(buffer_ptr remote_source, T* local_dest, size_t size) { //MPI_Recv((void*)local_dest, size * sizeof(T), MPI_BYTE, remote_source.node(), constants::DATA_TAG, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Get(remote_source, size, MPI_BYTE, remote_source.node(), (void *) remote_dest.get_mpi_address(), size, MPI_BYTE, rma_win); - MPI_Win_flush(remote_source.node(), rma_win); + MPI_Win_lock(MPI_LOCK_SHARED, remote_source.node(), 0, rma_win); + MPI_Get(remote_source, size * sizeof(T), MPI_BYTE, remote_source.node(), remote_dest.get_mpi_address(), size * sizeof(T), MPI_BYTE, rma_win); + MPI_Win_unlock(remote_source.node(), rma_win); } // to be used by the host @@ -297,8 +298,9 @@ class communicator { void recv_data_async(request_reference_type req, buffer_ptr remote_source, T* local_dest, size_t size) { //MPI_Irecv(static_cast(local_dest), size * sizeof(T), MPI_BYTE, remote_source.node(), constants::DATA_TAG, MPI_COMM_WORLD, &req.next_mpi_request()); - req.uses_rma = true; - MPI_RGet(remote_source, size, MPI_BYTE, remote_source.node(), (void *) remote_dest.get_mpi_address(), size, MPI_BYTE, rma_win, &req.next_mpi_request()); + req.uses_rma_ = true; + MPI_Win_lock(MPI_LOCK_SHARED, remote_source.node(), 0, rma_win); + MPI_Rget(local_dest, size * sizeof(T), MPI_BYTE, remote_source.node(), remote_source.get_mpi_address(), size * sizeof(T), MPI_BYTE, rma_win, &req.next_mpi_request()); } template @@ -307,12 +309,25 @@ class communicator { T* ptr; //int err = posix_memalign((void**)&ptr, constants::CACHE_LINE_SIZE, n * sizeof(T)); - MPI_Aint mpi_address; - MPI_Win_attach(rma_win, (void *) &mpi_address, n * sizeof(T)); + MPI_Win_attach(rma_win, (void*)ptr, n * sizeof(T)); + MPI_Aint mpi_address; + MPI_Get_address((void*)ptr, &mpi_address); // NOTE: no ctor is called return buffer_ptr(ptr, this_node_, mpi_address); } + // for host to allocate peer message buffers, needed because original function now manages rma window which must not happen for host-only local buffers + template + buffer_ptr allocate_peer_buffer(const size_t n, node_t source_node) + { + T* ptr; + //int err = + posix_memalign((void**)&ptr, constants::CACHE_LINE_SIZE, n * sizeof(T)); + // NOTE: no ctor is called + return buffer_ptr(ptr, this_node_); + } + + // for host to free peer message buffers, needed because original function now manages rma window which must not happen for host-only local buffers template void free_buffer(buffer_ptr ptr) { @@ -322,6 +337,14 @@ class communicator { free(static_cast(ptr.get())); } + template + void free_peer_buffer(buffer_ptr ptr) + { + assert(ptr.node() == this_node_); + // NOTE: no dtor is called + free(static_cast(ptr.get())); + } + static communicator& instance() { return *instance_; } static node_t this_node() { return instance().this_node_; } static size_t num_nodes() { return instance().nodes_; } diff --git a/include/ham/offload/offload.hpp b/include/ham/offload/offload.hpp index f721597..b44451e 100644 --- a/include/ham/offload/offload.hpp +++ b/include/ham/offload/offload.hpp @@ -273,9 +273,10 @@ future get(buffer_ptr remote_source, T* local_dest, size_t n) return result; #elif defined HAM_COMM_MPI_RMA_DYNAMIC - future result(comm.allocate_request(remote_dest.node())); + future result(comm.allocate_request(remote_source.node())); HAM_DEBUG( HAM_LOG << "offload::put(): initiating RMA get..." << std::endl; ) comm.recv_data_async(result.get_request(), remote_source, local_dest, n); + return result; #endif } @@ -308,8 +309,28 @@ void get_sync(buffer_ptr remote_source, T* local_dest, size_t n) //} +#ifdef HAM_COMM_MPI_RMA_DYNAMIC // compile-integration pending + template +future copy(buffer_ptr source, buffer_ptr dest, size_t n) +{ + net::communicator& comm = runtime::instance().communicator(); + + // make sure there is no winlock on dest from host + // solution: shared lock, unlocking from host not necessary + + // issues a put on the source node targeting the destination node + future result(comm.allocate_request(source.node())); + HAM_DEBUG( HAM_LOG << "offload::copy_sync(): initiating copy between " << source.node() << " and " << dest.node() << std::endl; ) + auto copy_msg = detail::offload_rma_copy_msg(result.get_request(), dest.node(), dest.get_mpi_address(), source.get(), n); + comm.send_msg(result.get_request(), (void*)©_msg, sizeof copy_msg); + comm.recv_result(result.get_request()); + + return result; +} +#endif + #ifndef HAM_COMM_ONE_SIDED // TODO(feature, high priority): implement -template + template void copy_sync(buffer_ptr source, buffer_ptr dest, size_t n) { net::communicator& comm = runtime::instance().communicator(); @@ -341,27 +362,8 @@ void copy_sync(buffer_ptr source, buffer_ptr dest, size_t n) copy(source, dest, n).get(); #endif } -#endif - - -#ifdef HAM_COMM_MPI_RMA_DYNAMIC // compile-integration pending -template -future copy(buffer_ptr source, buffer_ptr dest, size_t n) -{ - net::communicator& comm = runtime::instance().communicator(); - // make sure there is no winlock on dest from host - // solution: shared lock, unlocking from host not necessary - - // issues a put on the source node targeting the destination node - future result(comm.allocate_request(source.node())); - HAM_DEBUG( HAM_LOG << "offload::copy_sync(): initiating copy between " << source.node() << " and " << dest.node() << std::endl; ) - auto copy_msg = detail::offload_rma_copy_msg(result.get_request(), dest.node(), dest.get_mpi_address(), source.get(), n); - comm.send_msg(result.get_request(), (void*)©_msg, sizeof write_msg); - comm.recv_result(result.get_request()); - return result; -} #endif // TODO(feature): new API elements diff --git a/include/ham/offload/offload_msg.hpp b/include/ham/offload/offload_msg.hpp index b16a8a0..90e0fee 100644 --- a/include/ham/offload/offload_msg.hpp +++ b/include/ham/offload/offload_msg.hpp @@ -147,7 +147,7 @@ class offload_read_msg void operator()() //const { - communicator::instance().send_data(local_source, buffer_ptr(nullptr, remote_node, remote_addr), n); // NOTE: Why nullptr? This is for two-sided communicators, so we do not know the remote address, but match a receive operation that has the address. + communicator::instance().send_data(local_source, buffer_ptr(nullptr, remote_node, remote_addr), n); // send a result message to tell the sender, that the transfer is done if (req.valid()) { diff --git a/src/inner_product.cpp b/src/inner_product.cpp index 3dc1c60..a988abc 100644 --- a/src/inner_product.cpp +++ b/src/inner_product.cpp @@ -17,6 +17,15 @@ double inner_product(offload::buffer_ptr x, offload::buffer_ptr return z; } +bool print_buffer_content(offload::buffer_ptr x, size_t n) +{ + std::cout << "printing data on node " << x.node() << std::endl; + for (size_t i = 0; i < n; ++i) + std::cout << x[i] << " "; + std::cout << std::endl; + return true; +} + int main(int argc, char* argv[]) { // buffer size @@ -40,20 +49,26 @@ int main(int argc, char* argv[]) // allocate device memory (returns a buffer_ptr) auto a_target = offload::allocate(target, n); - auto b_target = offload::allocate(target, n); - + std::cout << "allocated remote buffer 1" << std::endl; + //auto b_target = offload::allocate(target, n); + //std::cout << "allocated remote buffer 2" << std::endl; + + // transfer data to the device (the target is implicitly specified by the destination buffer_ptr) - auto future_a_put = offload::put(a.data(), a_target, n); // async - offload::put(b.data(), b_target, n); // sync (implicitly returned future performs synchronisation in dtor), alternative: put_sync() + //auto future_a_put = offload::put(a.data(), a_target, n); // async + offload::put(a.data(), a_target, n); // sync + //offload::put(b.data(), b_target, n); // sync (implicitly returned future performs synchronisation in dtor), alternative: put_sync() // synchronise - future_a_put.get(); - + //future_a_put.get(); + + std::cout << "completed put" << std::endl; + // asynchronously offload the call to inner_product - auto c_future = offload::async(target, f2f(&inner_product, a_target, b_target, n)); + //auto c_future = offload::async(target, f2f(&inner_product, a_target, b_target, n)); // synchronise on the result - double c = c_future.get(); + //double c = c_future.get(); // we also could have used: // double c = offload::async(...).get(); @@ -62,7 +77,7 @@ int main(int argc, char* argv[]) // offload.async(...); // output the result - std::cout << "Result: " << c << std::endl; + //std::cout << "Result: " << c << std::endl; return 0; } diff --git a/src/test_data_transfer.cpp b/src/test_data_transfer.cpp index d53eb3e..a58569c 100644 --- a/src/test_data_transfer.cpp +++ b/src/test_data_transfer.cpp @@ -27,6 +27,15 @@ bool compare(const std::vector& a, const std::vector& b) return std::equal(a.begin(), a.end(), b.begin()); } +double print_buffer_content(offload::buffer_ptr x, size_t n) +{ + std::cout << "printing data on node " << x.node() << std::endl; + for (size_t i = 0; i < n; ++i) + std::cout << x[i] << " "; + std::cout << std::endl; + return 50.0; +} + int main(int argc, char* argv[]) { std::cout << "Testing data transfer: host -> target_a -> target_b -> host." << std::endl; @@ -48,12 +57,39 @@ int main(int argc, char* argv[]) // allocate device memory (returns a buffer_ptr) auto target_buffer_a = offload::allocate(target_a, n); auto target_buffer_b = offload::allocate(target_b, n); + + offload::sync(target_a, f2f(&print_buffer_content, target_buffer_a, n)); + + std::cout << "a - get: " << target_buffer_a.get() << std::endl; + std::cout << "a - node: " << target_buffer_a.node() << std::endl; + +#ifdef HAM_COMM_MPI_RMA_DYNAMIC + std::cout << "a - mpi: " << target_buffer_a.get_mpi_address() << std::endl; +#endif + + std::cout << "b - get: " << target_buffer_b.get() << std::endl; + std::cout << "b - node: " << target_buffer_b.node() << std::endl; + +#ifdef HAM_COMM_MPI_RMA_DYNAMIC + std::cout << "b - mpi: " << target_buffer_b.get_mpi_address() << std::endl; +#endif // host -> target_a -> target_b -> host + std::cout << "put to target_a: "; offload::put(write_buffer.data(), target_buffer_a, n); + std::cout << "done" << std::endl; + + offload::sync(target_a, f2f(&print_buffer_content, target_buffer_a, n)); + + std::cout << "copy from target_a to target_b: "; offload::copy_sync(target_buffer_a, target_buffer_b, n); + std::cout << "done" << std::endl; + + offload::async(target_b, f2f(&print_buffer_content, target_buffer_b, n)); + + std::cout << "get from target_b: "; offload::get(target_buffer_b, read_buffer.data(), n); - + std::cout << "done" << std::endl; // verify bool passed = compare(write_buffer, read_buffer); diff --git a/tools/install_boost.sh b/tools/install_boost.sh index 4f30ddf..9b91667 100755 --- a/tools/install_boost.sh +++ b/tools/install_boost.sh @@ -35,7 +35,7 @@ DOWNLOAD_PATH=$HOME/boost/ INSTALL_PATH=$HOME/software -NO_MIC=false # set to true, to disable building Boost for Xeon Phi +NO_MIC=true # set to true, to disable building Boost for Xeon Phi BASHRC_FILE=$HOME/.bashrc # set to /dev/null to disable, or to any other file to manually merge the needed changes into your .bashrc BOOST_BUILD_OPTIONS="-j8" # concurrent build with up to 8 commands From a29c877796276cd7a186714631eaddcd81ef61de Mon Sep 17 00:00:00 2001 From: Phuzzyhead Date: Thu, 5 Apr 2018 12:45:23 +0200 Subject: [PATCH 018/150] segfault fixes --- Jamroot | 4 ++-- src/inner_product.cpp | 21 ++++++++++----------- src/test_data_transfer.cpp | 13 +++++++------ 3 files changed, 19 insertions(+), 19 deletions(-) diff --git a/Jamroot b/Jamroot index ccccfe3..4fb6664 100644 --- a/Jamroot +++ b/Jamroot @@ -170,8 +170,8 @@ exe inner_product_mpi ; exe inner_product_mpi_rma_dynamic - : [ obj inner_product_obj : inner_product.cpp : /mpi//mpi HAM_COMM_MPI ] ham_offload_mpi_rma_dyn boost_program_options - : /mpi//mpi HAM_COMM_MPI_RMA_DYNAMIC HAM_DEBUG_ON + : [ obj inner_product_obj : inner_product.cpp : /mpi//mpi HAM_COMM_MPI_RMA_DYNAMIC ] ham_offload_mpi_rma_dyn boost_program_options + : /mpi//mpi -g HAM_COMM_MPI_RMA_DYNAMIC HAM_DEBUG_ON ; # exe test_data_transfer_scif diff --git a/src/inner_product.cpp b/src/inner_product.cpp index a988abc..f959eee 100644 --- a/src/inner_product.cpp +++ b/src/inner_product.cpp @@ -50,25 +50,24 @@ int main(int argc, char* argv[]) // allocate device memory (returns a buffer_ptr) auto a_target = offload::allocate(target, n); std::cout << "allocated remote buffer 1" << std::endl; - //auto b_target = offload::allocate(target, n); - //std::cout << "allocated remote buffer 2" << std::endl; + auto b_target = offload::allocate(target, n); + std::cout << "allocated remote buffer 2" << std::endl; // transfer data to the device (the target is implicitly specified by the destination buffer_ptr) - //auto future_a_put = offload::put(a.data(), a_target, n); // async - offload::put(a.data(), a_target, n); // sync - //offload::put(b.data(), b_target, n); // sync (implicitly returned future performs synchronisation in dtor), alternative: put_sync() - - // synchronise - //future_a_put.get(); + auto future_a_put = offload::put(a.data(), a_target, n); // async + offload::put(b.data(), b_target, n); // sync (implicitly returned future performs synchronisation in dtor), alternative: put_sync() + + // synchronise + future_a_put.get(); std::cout << "completed put" << std::endl; // asynchronously offload the call to inner_product - //auto c_future = offload::async(target, f2f(&inner_product, a_target, b_target, n)); + auto c_future = offload::async(target, f2f(&inner_product, a_target, b_target, n)); // synchronise on the result - //double c = c_future.get(); + double c = c_future.get(); // we also could have used: // double c = offload::async(...).get(); @@ -77,7 +76,7 @@ int main(int argc, char* argv[]) // offload.async(...); // output the result - //std::cout << "Result: " << c << std::endl; + std::cout << "Result: " << c << std::endl; return 0; } diff --git a/src/test_data_transfer.cpp b/src/test_data_transfer.cpp index a58569c..cb8c60d 100644 --- a/src/test_data_transfer.cpp +++ b/src/test_data_transfer.cpp @@ -76,15 +76,16 @@ int main(int argc, char* argv[]) // host -> target_a -> target_b -> host std::cout << "put to target_a: "; - offload::put(write_buffer.data(), target_buffer_a, n); - std::cout << "done" << std::endl; + auto put_future = offload::put(write_buffer.data(), target_buffer_a, n); + put_future.get(); + std::cout << "done" << std::endl; - offload::sync(target_a, f2f(&print_buffer_content, target_buffer_a, n)); - std::cout << "copy from target_a to target_b: "; - offload::copy_sync(target_buffer_a, target_buffer_b, n); - std::cout << "done" << std::endl; + offload::sync(target_a, f2f(&print_buffer_content, target_buffer_a, n)); + std::cout << "copy from target_a to target_b: "; + offload::copy_sync(target_buffer_a, target_buffer_b, n); + std::cout << "done" << std::endl; offload::async(target_b, f2f(&print_buffer_content, target_buffer_b, n)); std::cout << "get from target_b: "; From 2a3dc90ce91d1ee35e22c5a31a05df8390ea25f3 Mon Sep 17 00:00:00 2001 From: Phuzzyhead Date: Thu, 5 Apr 2018 18:38:28 +0200 Subject: [PATCH 019/150] started implementing pairwise dynamic windows --- .../ham/net/communicator_mpi_rma_dynamic.hpp | 117 ++++++++++++++---- include/ham/offload/offload_msg.hpp | 33 ++++- src/inner_product.cpp | 7 +- 3 files changed, 130 insertions(+), 27 deletions(-) diff --git a/include/ham/net/communicator_mpi_rma_dynamic.hpp b/include/ham/net/communicator_mpi_rma_dynamic.hpp index 3e28440..1ef1320 100644 --- a/include/ham/net/communicator_mpi_rma_dynamic.hpp +++ b/include/ham/net/communicator_mpi_rma_dynamic.hpp @@ -165,7 +165,6 @@ class communicator { MPI_Comm_size(MPI_COMM_WORLD, &t); nodes_ = t; host_node_ = 0; // TODO(improvement): make configureable, like for SCIF - MPI_Win_create_dynamic(MPI_INFO_NULL, MPI_COMM_WORLD, &rma_win); HAM_DEBUG( std::cout << "communicator::communicator(): initialising MPI done" << std::endl; ) @@ -194,8 +193,12 @@ class communicator { //MPI_Alltoall(&node_description, sizeof(node_descriptor), MPI_BYTE, node_descriptions.data(), sizeof(node_descriptor), MPI_BYTE, MPI_COMM_WORLD); MPI_Allgather(&node_description, sizeof(node_descriptor), MPI_BYTE, node_descriptions.data(), sizeof(node_descriptor), MPI_BYTE, MPI_COMM_WORLD); HAM_DEBUG( HAM_LOG << "communicator::communicator(): gathering node descriptions done" << std::endl; ) - + + // prepare global group to create pairwise groups + MPI_Comm_group(MPI_COMM_WORLD, &global_group); + if (is_host()) { + for (node_t i = 1; i < nodes_; ++i) { // TODO(improvement): needs to be changed when host-rank becomes configurable // allocate buffers peers[i].msg_buffers = allocate_peer_buffer(constants::MSG_BUFFERS, this_node_); @@ -203,7 +206,30 @@ class communicator { for(size_t j = constants::MSG_BUFFERS; j > 0; --j) { peers[i].buffer_pool.add(j-1); } + + // init comm to target from pairwise subgroups + const int members[2] = {host_node_, i}; // NOTE: this implies new group rank is 0 for host, 1 for target + MPI_Group pairwise_group; + MPI_Group_incl(global_group, 2, members, &pairwise_group); + MPI_Comm_create_group(MPI_COMM_WORLD, pairwise_group, 0, &(peers[i].rma_comm)); + MPI_Group_free(&pairwise_group); // no longer needed after COMM is created + + // init win to target + MPI_Win_create_dynamic(MPI_INFO_NULL, peers[i].rma_comm, &(peers[i].rma_win)); } + + + } else { + // init comm to host from pairwise subgroup + const int members[2] = {host_node_, this_node_}; // NOTE: this implies new group rank = 0 for host, 1 for target + MPI_Group pairwise_group; + MPI_Group_incl(global_group, 2, members, &pairwise_group); // should match the corresponding subgroup on host for i = this_node_ + MPI_Comm_create_group(MPI_COMM_WORLD, pairwise_group, 0, &(peers[host_node_].rma_comm)); + MPI_Group_free(&pairwise_group); // no longer needed after COMM is created + + // init win to host + MPI_Win_create_dynamic(MPI_INFO_NULL, peers[host_node_].rma_comm, &(peers[host_node_].rma_win)); + } } @@ -250,7 +276,7 @@ class communicator { void* recv_msg_host(void* msg = nullptr, size_t size = constants::MSG_SIZE) { static msg_buffer buffer; // NOTE ! - MPI_Recv(&buffer, size, MPI_BYTE, host_node_, constants::DEFAULT_TAG, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(&buffer, size, MPI_BYTE, MPI_ANY_SOURCE, constants::DEFAULT_TAG, MPI_COMM_WORLD, MPI_STATUS_IGNORE); // changed source from host_node_ to MPI_ANY_SOURCE so targets may react to request for setting up rma paths return static_cast(&buffer); } @@ -263,44 +289,57 @@ class communicator { return; } + // in MPI RMA backend only used by copy + // host uses async version + // targets don't send data to host as host uses rma get template void send_data(T* local_source, buffer_ptr remote_dest, size_t size) { - //MPI_Send((void*)local_source, size * sizeof(T), MPI_BYTE, remote_dest.node(), constants::DATA_TAG, MPI_COMM_WORLD); - MPI_Win_lock(MPI_LOCK_SHARED, remote_dest.node(), 0, rma_win); - MPI_Put(local_source, size * sizeof(T), MPI_BYTE, remote_dest.node(), remote_dest.get_mpi_address(), size * sizeof(T), MPI_BYTE, rma_win); - MPI_Win_unlock(remote_dest.node(), rma_win); + // resolve rank for subgroup + int target_rank; + if(remote_dest.node() > this_node_) { + target_rank = 1; + } else { + target_rank = 0; + } + // execute transfer + MPI_Win_lock(MPI_LOCK_SHARED, target_rank, 0, peers[remote_dest.node()].rma_win); + MPI_Put(local_source, size * sizeof(T), MPI_BYTE, target_rank, remote_dest.get_mpi_address(), size * sizeof(T), MPI_BYTE, peers[remote_dest.node()].rma_win); + MPI_Win_unlock(target_rank, peers[remote_dest.node()].rma_win); } - // to be used by the host + // to be used by the host only template void send_data_async(request_reference_type req, T* local_source, buffer_ptr remote_dest, size_t size) { - //MPI_Isend((void*)local_source, size * sizeof(T), MPI_BYTE, remote_dest.node(), constants::DATA_TAG, MPI_COMM_WORLD, &req.next_mpi_request()); req.uses_rma_ = true; - MPI_Win_lock(MPI_LOCK_SHARED, remote_dest.node(), 0, rma_win); - MPI_Rput(local_source, size * sizeof(T), MPI_BYTE, remote_dest.node(), remote_dest.get_mpi_address(), size * sizeof(T), MPI_BYTE, rma_win, &req.next_mpi_request()); + // resolving rank for subgroup not necessary, is always 1 for the target + MPI_Win_lock(MPI_LOCK_SHARED, 1, 0, peers[remote_dest.node()].rma_win); + MPI_Rput(local_source, size * sizeof(T), MPI_BYTE, 1, remote_dest.get_mpi_address(), size * sizeof(T), MPI_BYTE, peers[remote_dest.node()].rma_win, &req.next_mpi_request()); } - + // not used in MPI RMA backend + // host uses async version + // targets don't use get + // should be safe to remove template void recv_data(buffer_ptr remote_source, T* local_dest, size_t size) { - //MPI_Recv((void*)local_dest, size * sizeof(T), MPI_BYTE, remote_source.node(), constants::DATA_TAG, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - MPI_Win_lock(MPI_LOCK_SHARED, remote_source.node(), 0, rma_win); - MPI_Get(remote_source, size * sizeof(T), MPI_BYTE, remote_source.node(), remote_dest.get_mpi_address(), size * sizeof(T), MPI_BYTE, rma_win); - MPI_Win_unlock(remote_source.node(), rma_win); + MPI_Win_lock(MPI_LOCK_SHARED, 0, 0, peers[remote_source.node()].rma_win); // dummy rank number as if targets were to use recv_data from host + MPI_Get(remote_source, size * sizeof(T), MPI_BYTE, remote_source.node(), remote_dest.get_mpi_address(), size * sizeof(T), MPI_BYTE, peers[remote_source.node()].rma_win); + MPI_Win_unlock(remote_source.node(), peers[remote_source.node()].rma_win); } // to be used by the host template void recv_data_async(request_reference_type req, buffer_ptr remote_source, T* local_dest, size_t size) { - //MPI_Irecv(static_cast(local_dest), size * sizeof(T), MPI_BYTE, remote_source.node(), constants::DATA_TAG, MPI_COMM_WORLD, &req.next_mpi_request()); req.uses_rma_ = true; - MPI_Win_lock(MPI_LOCK_SHARED, remote_source.node(), 0, rma_win); - MPI_Rget(local_dest, size * sizeof(T), MPI_BYTE, remote_source.node(), remote_source.get_mpi_address(), size * sizeof(T), MPI_BYTE, rma_win, &req.next_mpi_request()); + + // resolving rank for subgroup not necessary, is always 1 for the target + MPI_Win_lock(MPI_LOCK_SHARED, 1, 0, peers[remote_source.node()].rma_win); + MPI_Rget(local_dest, size * sizeof(T), MPI_BYTE, 1, remote_source.get_mpi_address(), size * sizeof(T), MPI_BYTE, peers[remote_source.node()].rma_win, &req.next_mpi_request()); } template @@ -309,7 +348,7 @@ class communicator { T* ptr; //int err = posix_memalign((void**)&ptr, constants::CACHE_LINE_SIZE, n * sizeof(T)); - MPI_Win_attach(rma_win, (void*)ptr, n * sizeof(T)); + MPI_Win_attach(peers[source_node].rma_win, (void*)ptr, n * sizeof(T)); // only attach to the window corresponding to the requesting node, is attached to potential target-target-windows on demand MPI_Aint mpi_address; MPI_Get_address((void*)ptr, &mpi_address); // NOTE: no ctor is called @@ -333,6 +372,8 @@ class communicator { { assert(ptr.node() == this_node_); // NOTE: no dtor is called + + // remove from all potential rma windows MPI_Win_detach(rma_win, ptr.get()); free(static_cast(ptr.get())); } @@ -356,20 +397,52 @@ class communicator { return instance().node_descriptions[node]; } + // called to check if an rma path between two targets exists, sufficient to call on one of the two targets + bool has_rma_path(node_t target_node) { + // check if copy path exists + return !peers[remote_dest.node()].rma_win; + } + + // called to establish an rma path between two targets for copy operations, needs to be called on both sides + void establish_rma_path(node_t target_node) { + if(!has_rma_path(target_node)) { // make sure there is not already an rma path + const int members[2]; + // NOTE: protocol for target-target sub-ranks is: lower global rank: 0, higher global rank: 1 + // thus rank for existing copy paths can be easily translated by comparing target rank to own rank + if(this_node_ > target_node) { + members[0] = target_node; + members[1] = this_node_; + } else { + members[0] = this_node_; + members[1] = target_node; + } + MPI_Group pairwise_group; + MPI_Group_incl(global_group, 2, members, &pairwise_group); + MPI_Comm_create_group(MPI_COMM_WORLD, pairwise_group, 0, &(peers[target_node].rma_comm)); + MPI_Group_free(&pairwise_group); // no longer needed after COMM is created + MPI_Win_create_dynamic(MPI_INFO_NULL, peers[target_node].rma_comm, &(peers[target_node].rma_win)); + } + } + + private: static communicator* instance_; node_t this_node_; size_t nodes_; node_t host_node_; std::vector node_descriptions; // not as member in peer below, because Allgather is used to exchange node descriptions - MPI_Win rma_win; // globally shared dynamic window for rma ops - + MPI_Group global_group; + struct mpi_peer { buffer_ptr msg_buffers; // buffers used for MPI_ISend and IRecv by the sender // needed by sender to manage which buffers are in use and which are free // just manages indices, that can be used by detail::resource_pool buffer_pool; + + // mpi rma dynamic window + MPI_Win rma_win; + MPI_Comm rma_comm; }; mpi_peer* peers; diff --git a/include/ham/offload/offload_msg.hpp b/include/ham/offload/offload_msg.hpp index 90e0fee..e9f75b0 100644 --- a/include/ham/offload/offload_msg.hpp +++ b/include/ham/offload/offload_msg.hpp @@ -134,9 +134,7 @@ class offload_read_msg size_t n; }; - -// TODO(daniel, high priority): implement offload_copy_msg, copy with one-sided rma needs a msg containing ptrs for source+target -//#ifdef SOME_COOL_VAR_FOR_MPI_RMA_DYN // compile-integration pending +//#ifdef HAM_COMM_MPI_RMA_DYNAMIC template class ExecutionPolicy = default_execution_policy> class offload_rma_copy_msg : public active_msg, ExecutionPolicy> @@ -147,6 +145,9 @@ class offload_read_msg void operator()() //const { + communicator::instance().establish_rma_path(remote_node); // should quickly return if path already exists + // attach existing buffers to new target window ?!? + communicator::instance().send_data(local_source, buffer_ptr(nullptr, remote_node, remote_addr), n); // send a result message to tell the sender, that the transfer is done @@ -164,6 +165,32 @@ class offload_read_msg }; //#endif +// allows user to setup an rma link between two targets without a copy transfer +#ifdef HAM_COMM_MPI_RMA_DYNAMIC + template class ExecutionPolicy = default_execution_policy> + class setup_rma_path_msg + : public active_msg, ExecutionPolicy> + { + public: + setup_rma_path_msg(node_t remote_node) + : remote_node(remote_node) { } + + void operator()() //const + { + communicator::instance().establish_rma_path(remote_node); + + // send a result message to tell the sender that the path is set up + if (req.valid()) { + req.send_result((void*)&remote_node, sizeof remote_node); + } + } + private: + node_t remote_node; + }; +#endif + +// link buffer msg? to tell target of copy to add the buffer to the soecific window... which might not even exist...fuck + } // namespace detail } // namespace offload } // namespace ham diff --git a/src/inner_product.cpp b/src/inner_product.cpp index f959eee..87b04db 100644 --- a/src/inner_product.cpp +++ b/src/inner_product.cpp @@ -56,10 +56,10 @@ int main(int argc, char* argv[]) // transfer data to the device (the target is implicitly specified by the destination buffer_ptr) auto future_a_put = offload::put(a.data(), a_target, n); // async + future_a_put.get(); offload::put(b.data(), b_target, n); // sync (implicitly returned future performs synchronisation in dtor), alternative: put_sync() // synchronise - future_a_put.get(); std::cout << "completed put" << std::endl; @@ -77,7 +77,10 @@ int main(int argc, char* argv[]) // output the result std::cout << "Result: " << c << std::endl; - + + MPI_Win_create_dynamic() + + return 0; } From c2b18ae0a68f29258eadcc791effb3c6baa568d2 Mon Sep 17 00:00:00 2001 From: Daniel Deppisch Date: Fri, 6 Apr 2018 15:47:57 +0200 Subject: [PATCH 020/150] changed to paiwise global dynamic windows --- .../ham/net/communicator_mpi_rma_dynamic.hpp | 125 +++++++++--------- include/ham/offload/offload.hpp | 2 +- include/ham/offload/offload_msg.hpp | 8 +- src/inner_product.cpp | 3 +- 4 files changed, 70 insertions(+), 68 deletions(-) diff --git a/include/ham/net/communicator_mpi_rma_dynamic.hpp b/include/ham/net/communicator_mpi_rma_dynamic.hpp index 1ef1320..1ec104a 100644 --- a/include/ham/net/communicator_mpi_rma_dynamic.hpp +++ b/include/ham/net/communicator_mpi_rma_dynamic.hpp @@ -95,7 +95,7 @@ class communicator { HAM_DEBUG( HAM_LOG << "request::get(), after MPI_Waitall()" << std::endl; ) if(uses_rma_) { - MPI_Win_unlock(target_node, communicator::instance().rma_win); + MPI_Win_unlock(target_node, communicator::instance().peers[target_node].rma_win); } return static_cast(&communicator::instance().peers[target_node].msg_buffers[recv_buffer_index]); } @@ -194,43 +194,51 @@ class communicator { MPI_Allgather(&node_description, sizeof(node_descriptor), MPI_BYTE, node_descriptions.data(), sizeof(node_descriptor), MPI_BYTE, MPI_COMM_WORLD); HAM_DEBUG( HAM_LOG << "communicator::communicator(): gathering node descriptions done" << std::endl; ) - // prepare global group to create pairwise groups - MPI_Comm_group(MPI_COMM_WORLD, &global_group); - if (is_host()) { - - for (node_t i = 1; i < nodes_; ++i) { // TODO(improvement): needs to be changed when host-rank becomes configurable - // allocate buffers - peers[i].msg_buffers = allocate_peer_buffer(constants::MSG_BUFFERS, this_node_); - // fill resource pools - for(size_t j = constants::MSG_BUFFERS; j > 0; --j) { - peers[i].buffer_pool.add(j-1); - } - - // init comm to target from pairwise subgroups - const int members[2] = {host_node_, i}; // NOTE: this implies new group rank is 0 for host, 1 for target - MPI_Group pairwise_group; - MPI_Group_incl(global_group, 2, members, &pairwise_group); - MPI_Comm_create_group(MPI_COMM_WORLD, pairwise_group, 0, &(peers[i].rma_comm)); - MPI_Group_free(&pairwise_group); // no longer needed after COMM is created - - // init win to target - MPI_Win_create_dynamic(MPI_INFO_NULL, peers[i].rma_comm, &(peers[i].rma_win)); - } + if (is_host()) { + for (node_t i = 1; i < nodes_; ++i) { // TODO(improvement): needs to be changed when host-rank becomes configurable + // allocate buffers + peers[i].msg_buffers = allocate_peer_buffer(constants::MSG_BUFFERS, this_node_); + // fill resource pools + for (size_t j = constants::MSG_BUFFERS; j > 0; --j) { + peers[i].buffer_pool.add(j - 1); + } + } + } - } else { - // init comm to host from pairwise subgroup - const int members[2] = {host_node_, this_node_}; // NOTE: this implies new group rank = 0 for host, 1 for target - MPI_Group pairwise_group; - MPI_Group_incl(global_group, 2, members, &pairwise_group); // should match the corresponding subgroup on host for i = this_node_ - MPI_Comm_create_group(MPI_COMM_WORLD, pairwise_group, 0, &(peers[host_node_].rma_comm)); - MPI_Group_free(&pairwise_group); // no longer needed after COMM is created + // initialise 1 global window per target + for (node_t i = 1; i < nodes_; ++i) { + MPI_Win_create_dynamic(MPI_INFO_NULL, MPI_COMM_WORLD, &(peers[i].rma_win)); + } - // init win to host - MPI_Win_create_dynamic(MPI_INFO_NULL, peers[host_node_].rma_comm, &(peers[host_node_].rma_win)); + HAM_DEBUG( HAM_LOG << "communicator::communicator(): rma window creation done" << std::endl; ) +/* pairwise COMM stuff + // both + // prepare global group to create pairwise groups + MPI_Comm_group(MPI_COMM_WORLD, &global_group); + // host + // init comm to target from pairwise subgroups + const int members[2] = {host_node_, i}; // NOTE: this implies new group rank is 0 for host, 1 for target + MPI_Group pairwise_group; + MPI_Group_incl(global_group, 2, members, &pairwise_group); + MPI_Comm_create_group(MPI_COMM_WORLD, pairwise_group, 0, &(peers[i].rma_comm)); + MPI_Group_free(&pairwise_group); // no longer needed after COMM is created + + // init win to target + MPI_Win_create_dynamic(MPI_INFO_NULL, peers[i].rma_comm, &(peers[i].rma_win)); + // targets + // init comm to host from pairwise subgroup + const int members[2] = {host_node_, this_node_}; // NOTE: this implies new group rank = 0 for host, 1 for target + MPI_Group pairwise_group; + MPI_Group_incl(global_group, 2, members, &pairwise_group); // should match the corresponding subgroup on host for i = this_node_ + MPI_Comm_create_group(MPI_COMM_WORLD, pairwise_group, 0, &(peers[host_node_].rma_comm)); + MPI_Group_free(&pairwise_group); // no longer needed after COMM is created + + // init win to host + MPI_Win_create_dynamic(MPI_INFO_NULL, peers[host_node_].rma_comm, &(peers[host_node_].rma_win)); + */ - } } ~communicator() @@ -276,8 +284,8 @@ class communicator { void* recv_msg_host(void* msg = nullptr, size_t size = constants::MSG_SIZE) { static msg_buffer buffer; // NOTE ! - MPI_Recv(&buffer, size, MPI_BYTE, MPI_ANY_SOURCE, constants::DEFAULT_TAG, MPI_COMM_WORLD, MPI_STATUS_IGNORE); // changed source from host_node_ to MPI_ANY_SOURCE so targets may react to request for setting up rma paths - return static_cast(&buffer); + MPI_Recv(&buffer, size, MPI_BYTE, host_node_, constants::DEFAULT_TAG, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + return static_cast(&buffer); } // trigger receiving the result of a message on the sending side @@ -295,17 +303,10 @@ class communicator { template void send_data(T* local_source, buffer_ptr remote_dest, size_t size) { - // resolve rank for subgroup - int target_rank; - if(remote_dest.node() > this_node_) { - target_rank = 1; - } else { - target_rank = 0; - } // execute transfer - MPI_Win_lock(MPI_LOCK_SHARED, target_rank, 0, peers[remote_dest.node()].rma_win); - MPI_Put(local_source, size * sizeof(T), MPI_BYTE, target_rank, remote_dest.get_mpi_address(), size * sizeof(T), MPI_BYTE, peers[remote_dest.node()].rma_win); - MPI_Win_unlock(target_rank, peers[remote_dest.node()].rma_win); + MPI_Win_lock(MPI_LOCK_SHARED, remote_dest.node(), 0, peers[remote_dest.node()].rma_win); + MPI_Put(local_source, size * sizeof(T), MPI_BYTE, remote_dest.node(), remote_dest.get_mpi_address(), size * sizeof(T), MPI_BYTE, peers[remote_dest.node()].rma_win); + MPI_Win_unlock(remote_dest.node(), peers[remote_dest.node()].rma_win); } // to be used by the host only @@ -314,9 +315,8 @@ class communicator { { req.uses_rma_ = true; - // resolving rank for subgroup not necessary, is always 1 for the target - MPI_Win_lock(MPI_LOCK_SHARED, 1, 0, peers[remote_dest.node()].rma_win); - MPI_Rput(local_source, size * sizeof(T), MPI_BYTE, 1, remote_dest.get_mpi_address(), size * sizeof(T), MPI_BYTE, peers[remote_dest.node()].rma_win, &req.next_mpi_request()); + MPI_Win_lock(MPI_LOCK_SHARED, remote_dest.node(), 0, peers[remote_dest.node()].rma_win); + MPI_Rput(local_source, size * sizeof(T), MPI_BYTE, remote_dest.node(), remote_dest.get_mpi_address(), size * sizeof(T), MPI_BYTE, peers[remote_dest.node()].rma_win, &req.next_mpi_request()); } // not used in MPI RMA backend @@ -326,7 +326,7 @@ class communicator { template void recv_data(buffer_ptr remote_source, T* local_dest, size_t size) { - MPI_Win_lock(MPI_LOCK_SHARED, 0, 0, peers[remote_source.node()].rma_win); // dummy rank number as if targets were to use recv_data from host + MPI_Win_lock(MPI_LOCK_SHARED, remote_source.node(), 0, peers[remote_source.node()].rma_win); MPI_Get(remote_source, size * sizeof(T), MPI_BYTE, remote_source.node(), remote_dest.get_mpi_address(), size * sizeof(T), MPI_BYTE, peers[remote_source.node()].rma_win); MPI_Win_unlock(remote_source.node(), peers[remote_source.node()].rma_win); } @@ -337,9 +337,8 @@ class communicator { { req.uses_rma_ = true; - // resolving rank for subgroup not necessary, is always 1 for the target - MPI_Win_lock(MPI_LOCK_SHARED, 1, 0, peers[remote_source.node()].rma_win); - MPI_Rget(local_dest, size * sizeof(T), MPI_BYTE, 1, remote_source.get_mpi_address(), size * sizeof(T), MPI_BYTE, peers[remote_source.node()].rma_win, &req.next_mpi_request()); + MPI_Win_lock(MPI_LOCK_SHARED, remote_source.node(), 0, peers[remote_source.node()].rma_win); + MPI_Rget(local_dest, size * sizeof(T), MPI_BYTE, remote_source.node(), remote_source.get_mpi_address(), size * sizeof(T), MPI_BYTE, peers[remote_source.node()].rma_win, &req.next_mpi_request()); } template @@ -348,7 +347,10 @@ class communicator { T* ptr; //int err = posix_memalign((void**)&ptr, constants::CACHE_LINE_SIZE, n * sizeof(T)); - MPI_Win_attach(peers[source_node].rma_win, (void*)ptr, n * sizeof(T)); // only attach to the window corresponding to the requesting node, is attached to potential target-target-windows on demand + // attach to all windows + for (node_t i = 1; i < nodes_; ++i) { + MPI_Win_attach(peers[i].rma_win, (void*)ptr, n * sizeof(T)); + } MPI_Aint mpi_address; MPI_Get_address((void*)ptr, &mpi_address); // NOTE: no ctor is called @@ -366,18 +368,19 @@ class communicator { return buffer_ptr(ptr, this_node_); } - // for host to free peer message buffers, needed because original function now manages rma window which must not happen for host-only local buffers template void free_buffer(buffer_ptr ptr) { assert(ptr.node() == this_node_); // NOTE: no dtor is called - - // remove from all potential rma windows - MPI_Win_detach(rma_win, ptr.get()); + // remove from all rma windows + for (node_t i = 1; i < nodes_; ++i) { + MPI_Win_detach(peers[i].rma_win, ptr.get()); + } free(static_cast(ptr.get())); } + // for host to free peer message buffers, needed because original function now manages rma window which must not happen for host-only local buffers template void free_peer_buffer(buffer_ptr ptr) { @@ -397,12 +400,14 @@ class communicator { return instance().node_descriptions[node]; } +/* // called to check if an rma path between two targets exists, sufficient to call on one of the two targets bool has_rma_path(node_t target_node) { // check if copy path exists return !peers[remote_dest.node()].rma_win; } - +*/ +/* // called to establish an rma path between two targets for copy operations, needs to be called on both sides void establish_rma_path(node_t target_node) { if(!has_rma_path(target_node)) { // make sure there is not already an rma path @@ -423,7 +428,7 @@ class communicator { MPI_Win_create_dynamic(MPI_INFO_NULL, peers[target_node].rma_comm, &(peers[target_node].rma_win)); } } - +*/ private: static communicator* instance_; @@ -431,7 +436,6 @@ class communicator { size_t nodes_; node_t host_node_; std::vector node_descriptions; // not as member in peer below, because Allgather is used to exchange node descriptions - MPI_Group global_group; struct mpi_peer { buffer_ptr msg_buffers; // buffers used for MPI_ISend and IRecv by the sender @@ -442,7 +446,6 @@ class communicator { // mpi rma dynamic window MPI_Win rma_win; - MPI_Comm rma_comm; }; mpi_peer* peers; diff --git a/include/ham/offload/offload.hpp b/include/ham/offload/offload.hpp index b44451e..1c2e78c 100644 --- a/include/ham/offload/offload.hpp +++ b/include/ham/offload/offload.hpp @@ -309,7 +309,7 @@ void get_sync(buffer_ptr remote_source, T* local_dest, size_t n) //} -#ifdef HAM_COMM_MPI_RMA_DYNAMIC // compile-integration pending +#ifdef HAM_COMM_MPI_RMA_DYNAMIC template future copy(buffer_ptr source, buffer_ptr dest, size_t n) { diff --git a/include/ham/offload/offload_msg.hpp b/include/ham/offload/offload_msg.hpp index e9f75b0..cb8a5a8 100644 --- a/include/ham/offload/offload_msg.hpp +++ b/include/ham/offload/offload_msg.hpp @@ -145,9 +145,9 @@ class offload_read_msg void operator()() //const { - communicator::instance().establish_rma_path(remote_node); // should quickly return if path already exists + /* communicator::instance().establish_rma_path(remote_node); // should quickly return if path already exists // attach existing buffers to new target window ?!? - + */ communicator::instance().send_data(local_source, buffer_ptr(nullptr, remote_node, remote_addr), n); // send a result message to tell the sender, that the transfer is done @@ -165,6 +165,7 @@ class offload_read_msg }; //#endif +/* // allows user to setup an rma link between two targets without a copy transfer #ifdef HAM_COMM_MPI_RMA_DYNAMIC template class ExecutionPolicy = default_execution_policy> @@ -188,8 +189,7 @@ class offload_read_msg node_t remote_node; }; #endif - -// link buffer msg? to tell target of copy to add the buffer to the soecific window... which might not even exist...fuck +*/ } // namespace detail } // namespace offload diff --git a/src/inner_product.cpp b/src/inner_product.cpp index 87b04db..7ad0f18 100644 --- a/src/inner_product.cpp +++ b/src/inner_product.cpp @@ -77,8 +77,7 @@ int main(int argc, char* argv[]) // output the result std::cout << "Result: " << c << std::endl; - - MPI_Win_create_dynamic() + return 0; From b6d15c8e6ae52f694a2a987e5cbcf42149c74d37 Mon Sep 17 00:00:00 2001 From: Daniel Deppisch Date: Fri, 6 Apr 2018 16:01:49 +0200 Subject: [PATCH 021/150] fixed Jamroot --- Jamroot | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/Jamroot b/Jamroot index 4fb6664..6f6dbb2 100644 --- a/Jamroot +++ b/Jamroot @@ -56,10 +56,10 @@ obj offload_obj_mpi : ham/offload/offload.cpp : /mpi//mpi HAM_C constant OBJ_FILES_MPI : communicator_obj_mpi runtime_obj_mpi offload_obj_mpi communicator_mpi_obj_mpi ; -obj communicator_obj_mpi_rma_dyn : ham/net/communicator.cpp : /mpi//mpi HAM_COMM_MPI_RMA_DYNAMIC HAM_DEBUG_ON ; -obj communicator_mpi_rma_dyn_obj_mpi_rma_dyn : ham/net/communicator_mpi_rma_dynamic.cpp : /mpi//mpi HAM_COMM_MPI_RMA_DYNAMIC HAM_DEBUG_ON ; -obj runtime_obj_mpi_rma_dyn : ham/offload/runtime.cpp : /mpi//mpi HAM_COMM_MPI_RMA_DYNAMIC HAM_DEBUG_ON ; -obj offload_obj_mpi_rma_dyn : ham/offload/offload.cpp : /mpi//mpi HAM_COMM_MPI_RMA_DYNAMIC HAM_DEBUG_ON ; +obj communicator_obj_mpi_rma_dyn : ham/net/communicator.cpp : /mpi//mpi HAM_COMM_MPI_RMA_DYNAMIC ; +obj communicator_mpi_rma_dyn_obj_mpi_rma_dyn : ham/net/communicator_mpi_rma_dynamic.cpp : /mpi//mpi HAM_COMM_MPI_RMA_DYNAMIC ; +obj runtime_obj_mpi_rma_dyn : ham/offload/runtime.cpp : /mpi//mpi HAM_COMM_MPI_RMA_DYNAMIC ; +obj offload_obj_mpi_rma_dyn : ham/offload/offload.cpp : /mpi//mpi HAM_COMM_MPI_RMA_DYNAMIC ; constant OBJ_FILES_MPI_RMA_DYN : communicator_obj_mpi_rma_dyn communicator_mpi_rma_dyn_obj_mpi_rma_dyn runtime_obj_mpi_rma_dyn offload_obj_mpi_rma_dyn ; @@ -73,7 +73,7 @@ constant OBJ_FILES_MPI_RMA_DYN : communicator_obj_mpi_rma_dyn communicator_mpi_r # Libraries obj main_obj_mpi : ham/offload/main.cpp : /mpi//mpi HAM_COMM_MPI ; -obj main_obj_mpi_rma_dyn : ham/offload/main.cpp : /mpi//mpi HAM_COMM_MPI_RMA_DYNAMIC HAM_DEBUG_ON ; +obj main_obj_mpi_rma_dyn : ham/offload/main.cpp : /mpi//mpi HAM_COMM_MPI_RMA_DYNAMIC ; # obj main_obj_scif : ham/offload/main.cpp : scif HAM_COMM_SCIF ; lib ham_offload_mpi @@ -83,7 +83,7 @@ lib ham_offload_mpi lib ham_offload_mpi_rma_dyn : $(OBJ_FILES_COMMON) $(OBJ_FILES_MPI_RMA_DYN) main_obj_mpi_rma_dyn boost_program_options - : /mpi//mpi HAM_COMM_MPI_RMA_DYNAMIC HAM_DEBUG_ON + : /mpi//mpi HAM_COMM_MPI_RMA_DYNAMIC ; # lib ham_offload_scif @@ -92,7 +92,7 @@ lib ham_offload_mpi_rma_dyn # ; obj main_explicit_obj_mpi : ham/offload/main_explicit.cpp : /mpi//mpi HAM_COMM_MPI ; -obj main_explicit_obj_mpi_rma_dyn : ham/offload/main_explicit.cpp : /mpi//mpi HAM_COMM_MPI_RMA_DYNAMIC HAM_DEBUG_ON ; +obj main_explicit_obj_mpi_rma_dyn : ham/offload/main_explicit.cpp : /mpi//mpi HAM_COMM_MPI_RMA_DYNAMIC ; # obj main_explicit_obj_scif : ham/offload/main_explicit.cpp : scif HAM_COMM_SCIF ; lib ham_offload_mpi_explicit @@ -102,7 +102,7 @@ lib ham_offload_mpi_explicit lib ham_offload_mpi_rma_dyn_explicit : $(OBJ_FILES_COMMON) $(OBJ_FILES_MPI_RMA_DYN) main_explicit_obj_mpi_rma_dyn boost_program_options - : /mpi//mpi HAM_COMM_MPI_RMA_DYNAMIC HAM_EXPLICIT HAM_DEBUG_ON + : /mpi//mpi HAM_COMM_MPI_RMA_DYNAMIC HAM_EXPLICIT ; # lib ham_offload_scif_explicit @@ -118,10 +118,10 @@ exe benchmark_ham_offload_mpi : /mpi//mpi ham_offload_mpi ; -obj benchmark_ham_offload_mpi_rma_dyn_obj : benchmark_ham_offload.cpp : /mpi//mpi HAM_COMM_MPI_RMA_DYNAMIC HAM_DEBUG_ON ; +obj benchmark_ham_offload_mpi_rma_dyn_obj : benchmark_ham_offload.cpp : /mpi//mpi HAM_COMM_MPI_RMA_DYNAMIC ; exe benchmark_ham_offload_mpi_rma_dyn : benchmark_ham_offload_mpi_rma_dyn_obj boost_program_options - : /mpi//mpi ham_offload_mpi_rma_dyn HAM_DEBUG_ON + : /mpi//mpi ham_offload_mpi_rma_dyn ; # obj benchmark_ham_offload_scif_obj : benchmark_ham_offload.cpp : scif HAM_COMM_SCIF ; @@ -155,7 +155,7 @@ exe ham_offload exe ham_offload_explicit : ham_offload_explicit.cpp ham_offload_mpi_rma_dyn_explicit boost_program_options # : /mpi//mpi HAM_COMM_MPI - : /mpi//mpi HAM_COMM_MPI_RMA_DYNAMIC HAM_DEBUG_ON + : /mpi//mpi HAM_COMM_MPI_RMA_DYNAMIC # : scif HAM_COMM_SCIF ; @@ -171,7 +171,7 @@ exe inner_product_mpi exe inner_product_mpi_rma_dynamic : [ obj inner_product_obj : inner_product.cpp : /mpi//mpi HAM_COMM_MPI_RMA_DYNAMIC ] ham_offload_mpi_rma_dyn boost_program_options - : /mpi//mpi -g HAM_COMM_MPI_RMA_DYNAMIC HAM_DEBUG_ON + : /mpi//mpi -g HAM_COMM_MPI_RMA_DYNAMIC ; # exe test_data_transfer_scif @@ -186,7 +186,7 @@ exe test_data_transfer_mpi exe test_data_transfer_mpi_rma_dynamic : [ obj test_data_transfer_obj : test_data_transfer.cpp : /mpi//mpi HAM_COMM_MPI_RMA_DYNAMIC ] ham_offload_mpi_rma_dyn boost_program_options - : /mpi//mpi HAM_COMM_MPI_RMA_DYNAMIC HAM_DEBUG_ON + : /mpi//mpi HAM_COMM_MPI_RMA_DYNAMIC ; # exe test_argument_transfer_scif @@ -201,7 +201,7 @@ exe test_argument_transfer_mpi exe test_argument_transfer_mpi_rma_dynamic : [ obj test_argument_transfer_obj : test_argument_transfer.cpp : /mpi//mpi HAM_COMM_MPI_RMA_DYNAMIC ] ham_offload_mpi_rma_dyn boost_program_options - : /mpi//mpi HAM_COMM_MPI_RMA_DYNAMIC HAM_DEBUG_ON + : /mpi//mpi HAM_COMM_MPI_RMA_DYNAMIC ; # Explicit targets (not built by default) From 5279080f8c8fd5fc5d0f0929917e295acf51f50e Mon Sep 17 00:00:00 2001 From: Daniel Deppisch Date: Fri, 6 Apr 2018 16:53:42 +0200 Subject: [PATCH 022/150] fixed error in unused function --- Jamroot | 5 +++-- include/ham/net/communicator_mpi_rma_dynamic.hpp | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/Jamroot b/Jamroot index 6f6dbb2..f832e4e 100644 --- a/Jamroot +++ b/Jamroot @@ -32,14 +32,15 @@ project HAM $(INC) $(BOOST_PATH)/include debug:HAM_DEBUG_ON - debug_mic:HAM_DEBUG_ON +# debug_mic:HAM_DEBUG_ON # intel:"-static-intel" on # off, on, full #speed # off, speed, space "-hstd=c++11" multi # static - : default-build debug release debug_mic release_mic + : default-build release +# : default-build debug release debug_mic release_mic ; # Object files that are compiled the same for all targets diff --git a/include/ham/net/communicator_mpi_rma_dynamic.hpp b/include/ham/net/communicator_mpi_rma_dynamic.hpp index 1ec104a..6e7f318 100644 --- a/include/ham/net/communicator_mpi_rma_dynamic.hpp +++ b/include/ham/net/communicator_mpi_rma_dynamic.hpp @@ -327,7 +327,7 @@ class communicator { void recv_data(buffer_ptr remote_source, T* local_dest, size_t size) { MPI_Win_lock(MPI_LOCK_SHARED, remote_source.node(), 0, peers[remote_source.node()].rma_win); - MPI_Get(remote_source, size * sizeof(T), MPI_BYTE, remote_source.node(), remote_dest.get_mpi_address(), size * sizeof(T), MPI_BYTE, peers[remote_source.node()].rma_win); + MPI_Get(remote_source, size * sizeof(T), MPI_BYTE, remote_source.node(), remote_source.get_mpi_address(), size * sizeof(T), MPI_BYTE, peers[remote_source.node()].rma_win); MPI_Win_unlock(remote_source.node(), peers[remote_source.node()].rma_win); } From bf0dfa536c44ca1abf9eb1a023557b529ae26d52 Mon Sep 17 00:00:00 2001 From: Daniel Deppisch Date: Sat, 7 Apr 2018 18:57:04 +0200 Subject: [PATCH 023/150] made host permanently lock all windows --- .../ham/net/communicator_mpi_rma_dynamic.hpp | 23 +++++++++++++------ 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/include/ham/net/communicator_mpi_rma_dynamic.hpp b/include/ham/net/communicator_mpi_rma_dynamic.hpp index 6e7f318..954d504 100644 --- a/include/ham/net/communicator_mpi_rma_dynamic.hpp +++ b/include/ham/net/communicator_mpi_rma_dynamic.hpp @@ -95,7 +95,7 @@ class communicator { HAM_DEBUG( HAM_LOG << "request::get(), after MPI_Waitall()" << std::endl; ) if(uses_rma_) { - MPI_Win_unlock(target_node, communicator::instance().peers[target_node].rma_win); + MPI_Win_flush(target_node, communicator::instance().peers[target_node].rma_win); } return static_cast(&communicator::instance().peers[target_node].msg_buffers[recv_buffer_index]); } @@ -212,6 +212,13 @@ class communicator { MPI_Win_create_dynamic(MPI_INFO_NULL, MPI_COMM_WORLD, &(peers[i].rma_win)); } + // get all locks to targets + if (is_host()) { + for (node_t i = 1; i < nodes_; ++i) { // TODO(improvement): needs to be changed when host-rank becomes configurable + MPI_Win_lock(MPI_LOCK_SHARED, i, 0, peers[i].rma_win); + } + } + HAM_DEBUG( HAM_LOG << "communicator::communicator(): rma window creation done" << std::endl; ) /* pairwise COMM stuff // both @@ -304,9 +311,10 @@ class communicator { void send_data(T* local_source, buffer_ptr remote_dest, size_t size) { // execute transfer - MPI_Win_lock(MPI_LOCK_SHARED, remote_dest.node(), 0, peers[remote_dest.node()].rma_win); + // MPI_Win_lock(MPI_LOCK_SHARED, remote_dest.node(), 0, peers[remote_dest.node()].rma_win); MPI_Put(local_source, size * sizeof(T), MPI_BYTE, remote_dest.node(), remote_dest.get_mpi_address(), size * sizeof(T), MPI_BYTE, peers[remote_dest.node()].rma_win); - MPI_Win_unlock(remote_dest.node(), peers[remote_dest.node()].rma_win); + MPI_Win_flush(remote_dest.node(), peers[remote_dest.node()].rma_win); + // MPI_Win_unlock(remote_dest.node(), peers[remote_dest.node()].rma_win); } // to be used by the host only @@ -315,7 +323,7 @@ class communicator { { req.uses_rma_ = true; - MPI_Win_lock(MPI_LOCK_SHARED, remote_dest.node(), 0, peers[remote_dest.node()].rma_win); + // MPI_Win_lock(MPI_LOCK_SHARED, remote_dest.node(), 0, peers[remote_dest.node()].rma_win); MPI_Rput(local_source, size * sizeof(T), MPI_BYTE, remote_dest.node(), remote_dest.get_mpi_address(), size * sizeof(T), MPI_BYTE, peers[remote_dest.node()].rma_win, &req.next_mpi_request()); } @@ -326,9 +334,10 @@ class communicator { template void recv_data(buffer_ptr remote_source, T* local_dest, size_t size) { - MPI_Win_lock(MPI_LOCK_SHARED, remote_source.node(), 0, peers[remote_source.node()].rma_win); + // MPI_Win_lock(MPI_LOCK_SHARED, remote_source.node(), 0, peers[remote_source.node()].rma_win); MPI_Get(remote_source, size * sizeof(T), MPI_BYTE, remote_source.node(), remote_source.get_mpi_address(), size * sizeof(T), MPI_BYTE, peers[remote_source.node()].rma_win); - MPI_Win_unlock(remote_source.node(), peers[remote_source.node()].rma_win); + MPI_Win_flush(remote_source.node(), peers[remote_source.node()].rma_win); + // MPI_Win_unlock(remote_source.node(), peers[remote_source.node()].rma_win); } // to be used by the host @@ -337,7 +346,7 @@ class communicator { { req.uses_rma_ = true; - MPI_Win_lock(MPI_LOCK_SHARED, remote_source.node(), 0, peers[remote_source.node()].rma_win); + // MPI_Win_lock(MPI_LOCK_SHARED, remote_source.node(), 0, peers[remote_source.node()].rma_win); MPI_Rget(local_dest, size * sizeof(T), MPI_BYTE, remote_source.node(), remote_source.get_mpi_address(), size * sizeof(T), MPI_BYTE, peers[remote_source.node()].rma_win, &req.next_mpi_request()); } From 4e2c5de2b6df268bcebb98b82add21a542bda4ac Mon Sep 17 00:00:00 2001 From: Daniel Deppisch Date: Sat, 7 Apr 2018 19:16:20 +0200 Subject: [PATCH 024/150] fixed send_data() for target-target-copy --- include/ham/net/communicator_mpi_rma_dynamic.hpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/ham/net/communicator_mpi_rma_dynamic.hpp b/include/ham/net/communicator_mpi_rma_dynamic.hpp index 954d504..3922e04 100644 --- a/include/ham/net/communicator_mpi_rma_dynamic.hpp +++ b/include/ham/net/communicator_mpi_rma_dynamic.hpp @@ -215,7 +215,7 @@ class communicator { // get all locks to targets if (is_host()) { for (node_t i = 1; i < nodes_; ++i) { // TODO(improvement): needs to be changed when host-rank becomes configurable - MPI_Win_lock(MPI_LOCK_SHARED, i, 0, peers[i].rma_win); + MPI_Win_lock(MPI_LOCK_SHARED, i, 0, peers[i].rma_win); // shared locks so host won't need to unlock for target-target-copy } } @@ -311,10 +311,10 @@ class communicator { void send_data(T* local_source, buffer_ptr remote_dest, size_t size) { // execute transfer - // MPI_Win_lock(MPI_LOCK_SHARED, remote_dest.node(), 0, peers[remote_dest.node()].rma_win); + MPI_Win_lock(MPI_LOCK_SHARED, remote_dest.node(), 0, peers[remote_dest.node()].rma_win); MPI_Put(local_source, size * sizeof(T), MPI_BYTE, remote_dest.node(), remote_dest.get_mpi_address(), size * sizeof(T), MPI_BYTE, peers[remote_dest.node()].rma_win); - MPI_Win_flush(remote_dest.node(), peers[remote_dest.node()].rma_win); - // MPI_Win_unlock(remote_dest.node(), peers[remote_dest.node()].rma_win); + // MPI_Win_flush(remote_dest.node(), peers[remote_dest.node()].rma_win); + MPI_Win_unlock(remote_dest.node(), peers[remote_dest.node()].rma_win); } // to be used by the host only From 292369ff5096e54ac86b765e0b80238b6db27063 Mon Sep 17 00:00:00 2001 From: Phuzzyhead Date: Thu, 12 Apr 2018 15:49:21 +0200 Subject: [PATCH 025/150] changed benchmark for new backend --- src/benchmark_ham_offload.cpp | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/src/benchmark_ham_offload.cpp b/src/benchmark_ham_offload.cpp index 90a0f64..3e55ec7 100644 --- a/src/benchmark_ham_offload.cpp +++ b/src/benchmark_ham_offload.cpp @@ -10,6 +10,7 @@ #include #include #include // posix_memalign +//#include #include "ham/util/time.hpp" @@ -124,6 +125,7 @@ int main(int argc, char * argv[]) ("allocate,a", boost::program_options::value()->zero_tokens(), "benchmark memory allocation/deallocation on target") ("copy-in,i", boost::program_options::value()->zero_tokens(), "benchmark data copy to target") ("copy-out,o", boost::program_options::value()->zero_tokens(), "benchmark data copy from target") + ("copy-direct,d", boost::program_options::value()->zero_tokens(), "benchmark data copy from target to another target") ("call,c", boost::program_options::value()->zero_tokens(), "benchmark function call on target") ("call-mul,m", boost::program_options::value()->zero_tokens(), "benchmark function call (multiplication) on target") ("async,y", boost::program_options::value()->zero_tokens(), "perform benchmark function calls asynchronously") @@ -157,6 +159,11 @@ int main(int argc, char * argv[]) std::cout << "# COMM_MPI enabled" << std::endl; #else std::cout << "# COMM_MPI disabled" << std::endl; + #endif + #ifdef HAM_COMM_MPI_RMA_DYNAMIC + std::cout << "# COMM_MPI_RMA_DYNAMIC enabled" << std::endl; + #else + std::cout << "# COMM_MPI_RMA_DYNAMIC disabled" << std::endl; #endif #ifdef HAM_COMM_SCIF @@ -258,6 +265,29 @@ int main(int argc, char * argv[]) copy_out_time.to_file(filename + "copy_out_time"); } + if (vm.count("copy-direct")) + { + // first allocate memory + offload::buffer_ptr remote_source = offload::allocate(1, data_size); + offload::buffer_ptr remote_target = offload::allocate(2, data_size); + statistics copy_direct_time(runs, warmup_runs); + + for (size_t i = 0; i < (runs + warmup_runs); ++i) + { + timer clock; + offload_copy_direct(remote_source, remote_target, data_size); + copy_direct_time.add(clock); + } + // free memory + offload_free(remote_source); + offload_free(remote_target); + + cout << "HAM-Offload copy-direct time: " << endl + << header_string_data << endl + << "copy-direct:\t" << copy_direct_time.string() << "\t" << data_size << endl; + copy_direct_time.to_file(filename + "copy_direct_time"); + } + if (vm.count("call")) { statistics call_time(runs, warmup_runs); From 3a16160d5b402dda174f7716ece146711341b87a Mon Sep 17 00:00:00 2001 From: Phuzzyhead Date: Thu, 12 Apr 2018 18:03:45 +0200 Subject: [PATCH 026/150] added all ranks permanent window locks, only attach buffers to own window --- .../ham/net/communicator_mpi_rma_dynamic.hpp | 30 +++++++++++-------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/include/ham/net/communicator_mpi_rma_dynamic.hpp b/include/ham/net/communicator_mpi_rma_dynamic.hpp index 3922e04..61f1f27 100644 --- a/include/ham/net/communicator_mpi_rma_dynamic.hpp +++ b/include/ham/net/communicator_mpi_rma_dynamic.hpp @@ -213,11 +213,13 @@ class communicator { } // get all locks to targets - if (is_host()) { - for (node_t i = 1; i < nodes_; ++i) { // TODO(improvement): needs to be changed when host-rank becomes configurable - MPI_Win_lock(MPI_LOCK_SHARED, i, 0, peers[i].rma_win); // shared locks so host won't need to unlock for target-target-copy - } - } + // targets lock to other targets for copies + for (node_t i = 1; i < nodes_; ++i) { // TODO(improvement): needs to be changed when host-rank becomes configurable + if(i != this_node_) { + MPI_Win_lock(MPI_LOCK_SHARED, i, 0, peers[i].rma_win); // shared locks because all ranks lock on every target concurrently + } + } + HAM_DEBUG( HAM_LOG << "communicator::communicator(): rma window creation done" << std::endl; ) /* pairwise COMM stuff @@ -311,10 +313,10 @@ class communicator { void send_data(T* local_source, buffer_ptr remote_dest, size_t size) { // execute transfer - MPI_Win_lock(MPI_LOCK_SHARED, remote_dest.node(), 0, peers[remote_dest.node()].rma_win); + // MPI_Win_lock(MPI_LOCK_SHARED, remote_dest.node(), 0, peers[remote_dest.node()].rma_win); // not needed since all ranks have locks on all targets MPI_Put(local_source, size * sizeof(T), MPI_BYTE, remote_dest.node(), remote_dest.get_mpi_address(), size * sizeof(T), MPI_BYTE, peers[remote_dest.node()].rma_win); // MPI_Win_flush(remote_dest.node(), peers[remote_dest.node()].rma_win); - MPI_Win_unlock(remote_dest.node(), peers[remote_dest.node()].rma_win); + // MPI_Win_unlock(remote_dest.node(), peers[remote_dest.node()].rma_win); } // to be used by the host only @@ -356,10 +358,11 @@ class communicator { T* ptr; //int err = posix_memalign((void**)&ptr, constants::CACHE_LINE_SIZE, n * sizeof(T)); - // attach to all windows - for (node_t i = 1; i < nodes_; ++i) { + // attach to own window + MPI_Win_attach(peers[this_node_].rma_win, (void*)ptr, n * sizeof(T)); + /* for (node_t i = 1; i < nodes_; ++i) { // nonsense, all accesses to a rank will only take place on that targets window, no need to attach to other MPI_Win_attach(peers[i].rma_win, (void*)ptr, n * sizeof(T)); - } + } */ MPI_Aint mpi_address; MPI_Get_address((void*)ptr, &mpi_address); // NOTE: no ctor is called @@ -382,10 +385,11 @@ class communicator { { assert(ptr.node() == this_node_); // NOTE: no dtor is called - // remove from all rma windows - for (node_t i = 1; i < nodes_; ++i) { + // remove from own rma window + MPI_Win_detach(peers[this_node_].rma_win, ptr.get()); + /* for (node_t i = 1; i < nodes_; ++i) { // nonsense, all accesses to a rank will only take place on that targets window, no need to attach to other MPI_Win_detach(peers[i].rma_win, ptr.get()); - } + } */ free(static_cast(ptr.get())); } From fd3627706d7c1f44e2012a041aa1c05aa38ce873 Mon Sep 17 00:00:00 2001 From: Phuzzyhead Date: Thu, 12 Apr 2018 18:32:53 +0200 Subject: [PATCH 027/150] fixed missing flush for copy --- include/ham/net/communicator_mpi_rma_dynamic.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/ham/net/communicator_mpi_rma_dynamic.hpp b/include/ham/net/communicator_mpi_rma_dynamic.hpp index 61f1f27..ea50eff 100644 --- a/include/ham/net/communicator_mpi_rma_dynamic.hpp +++ b/include/ham/net/communicator_mpi_rma_dynamic.hpp @@ -315,7 +315,7 @@ class communicator { // execute transfer // MPI_Win_lock(MPI_LOCK_SHARED, remote_dest.node(), 0, peers[remote_dest.node()].rma_win); // not needed since all ranks have locks on all targets MPI_Put(local_source, size * sizeof(T), MPI_BYTE, remote_dest.node(), remote_dest.get_mpi_address(), size * sizeof(T), MPI_BYTE, peers[remote_dest.node()].rma_win); - // MPI_Win_flush(remote_dest.node(), peers[remote_dest.node()].rma_win); + MPI_Win_flush(remote_dest.node(), peers[remote_dest.node()].rma_win); // MPI_Win_unlock(remote_dest.node(), peers[remote_dest.node()].rma_win); } From 3b22ec1b0695e19977662d9e08b2dca16a609b47 Mon Sep 17 00:00:00 2001 From: Phuzzyhead Date: Fri, 20 Apr 2018 17:33:09 +0200 Subject: [PATCH 028/150] initial commit of truly one-sided rma backend --- .../ham/net/communicator_mpi_rma_dynamic.hpp | 299 ++++++++++++++---- include/ham/offload/offload_msg.hpp | 4 +- 2 files changed, 239 insertions(+), 64 deletions(-) diff --git a/include/ham/net/communicator_mpi_rma_dynamic.hpp b/include/ham/net/communicator_mpi_rma_dynamic.hpp index ea50eff..2e29c0c 100644 --- a/include/ham/net/communicator_mpi_rma_dynamic.hpp +++ b/include/ham/net/communicator_mpi_rma_dynamic.hpp @@ -17,6 +17,7 @@ #include "ham/misc/types.hpp" #include "ham/util/debug.hpp" #include "ham/util/log.hpp" +#include "communicator.hpp" namespace ham { namespace net { @@ -64,40 +65,52 @@ class node_descriptor class communicator { public: - // externally used interface of request must be shared across all communicator-implementations + enum { + NO_BUFFER_INDEX = constants::MSG_BUFFERS, // invalid buffer index (max valid + 1) + FLAG_FALSE = constants::MSG_BUFFERS + 1 // special value, outside normal index range + }; + + // externally used interface of request must be shared across all communicator-implementations class request { public: request() : valid_(false) {} // instantiate invalid - request(node_t target_node, node_t source_node, size_t send_buffer_index, size_t recv_buffer_index) - : target_node(target_node), source_node(source_node), valid_(true), send_buffer_index(send_buffer_index), recv_buffer_index(recv_buffer_index), req_count(0), uses_rma_(false) + request(node_t target_node, node_t source_node, size_t remote_buffer_index, size_t local_buffer_index) + : target_node(target_node), source_node(source_node), valid_(true), remote_buffer_index(remote_buffer_index), local_buffer_index(local_buffer_index), req_count(0), uses_rma_(false) {} // return true if request was finished // will not work as intended for rma ops, no equivalent to test() available for remote completion bool test() { - int flag = 0; - MPI_Testall(req_count, mpi_reqs, &flag, MPI_STATUS_IGNORE); // just test the receive request, since the send belonging to the request triggers the remote send that is received + // int flag = 0; + + // MPI_Testall(req_count, mpi_reqs, &flag, MPI_STATUS_IGNORE); // just test the receive request, since the send belonging to the request triggers the remote send that is received + /* if(uses_rma_) { HAM_DEBUG( HAM_LOG << "request::test(), warning: may give false positive on rma remote completion" << std::endl; ) } return flag != 0; + */ + return communicator::instance().test_local_flag(target_node, local_buffer_index); } void* get() // blocks { - HAM_DEBUG( HAM_LOG << "request::get(), before MPI_Waitall()" << std::endl; ) + /* + HAM_DEBUG( HAM_LOG << "request::get(), before MPI_Waitall()" << std::endl; ) MPI_Waitall(req_count, mpi_reqs, MPI_STATUS_IGNORE); // must wait for all requests to satisfy the standard HAM_DEBUG( HAM_LOG << "request::get(), after MPI_Waitall()" << std::endl; ) if(uses_rma_) { - MPI_Win_flush(target_node, communicator::instance().peers[target_node].rma_win); + MPI_Win_flush(target_node, communicator::instance().peers[target_node].rma_data_win); } return static_cast(&communicator::instance().peers[target_node].msg_buffers[recv_buffer_index]); + */ + return communicator::instance().recv_msg(target_node, local_buffer_index); } template @@ -106,8 +119,8 @@ class communicator { assert(communicator::this_node() == target_node); // this assert fails if send_result is called from the wrong side // TODO(improvement, low priority): better go through communicator, such that no MPI calls are anywhere else - MPI_Send(result_msg, size, MPI_BYTE, source_node, constants::RESULT_TAG, MPI_COMM_WORLD); - //communicator::instance().send_msg(source_node, source_buffer_index, NO_BUFFER_INDEX, result_msg, size); + // MPI_Send(result_msg, size, MPI_BYTE, source_node, constants::RESULT_TAG, MPI_COMM_WORLD); + communicator::instance().send_msg(source_node, local_buffer_index, NO_BUFFER_INDEX, result_msg, size); } bool valid() const @@ -135,8 +148,8 @@ class communicator { // only needed by the sender enum { NUM_REQUESTS = 3 }; - size_t send_buffer_index; // buffer to use for sending the message - size_t recv_buffer_index; // buffer to use for receiving the result + size_t remote_buffer_index; // buffer to use for sending the message + size_t local_buffer_index; // buffer to use for receiving the result size_t req_count; private: @@ -194,7 +207,7 @@ class communicator { MPI_Allgather(&node_description, sizeof(node_descriptor), MPI_BYTE, node_descriptions.data(), sizeof(node_descriptor), MPI_BYTE, MPI_COMM_WORLD); HAM_DEBUG( HAM_LOG << "communicator::communicator(): gathering node descriptions done" << std::endl; ) - + /* if (is_host()) { for (node_t i = 1; i < nodes_; ++i) { // TODO(improvement): needs to be changed when host-rank becomes configurable @@ -205,18 +218,76 @@ class communicator { peers[i].buffer_pool.add(j - 1); } } - } + }*/ + + // initialise all windows + for (node_t i = 0; i < nodes_; ++i) { + // dynamic data window + MPI_Win_create_dynamic(MPI_INFO_NULL, MPI_COMM_WORLD, &(peers[i].rma_data_win)); + + if (i == this_node_) { // create local windows with allocated memory for targets, host creates one inbound set of windows for all targets + // allocate memory + if (this_node_ == host_node_) { + // MSG_SIZE/FLAG_SIZE * MSG_BUFFERS * num_nodes for host + peers[this_node_].msg_data = allocate_peer_buffer(constants::MSG_BUFFERS * nodes_, this_node_); + peers[this_node_].flag_data = allocate_peer_buffer(constants::MSG_BUFFERS * nodes_, this_node_); + // fill resource pools + for (size_t j = 0; j < nodes_; ++j) { + for (size_t k = constants::MSG_BUFFERS; k > 0; --k) { + peers[j].local_buffer_pool.add(k - 1); + peers[j].remote_buffer_pool.add(k - 1); + } + // allocate first next_request, + allocate_next_request(j); + } + } else { + // MSG_SIZE/FLAG_SIZE * MSG_BUFFERS for targets + peers[this_node_].msg_data = allocate_peer_buffer(constants::MSG_BUFFERS, this_node_); + peers[this_node_].flag_data = allocate_peer_buffer(constants::MSG_BUFFERS, this_node_); + } - // initialise 1 global window per target - for (node_t i = 1; i < nodes_; ++i) { - MPI_Win_create_dynamic(MPI_INFO_NULL, MPI_COMM_WORLD, &(peers[i].rma_win)); + // create windows + MPI_Win_create(&(peers[this_node_].msg_data), sizeof(msg_buffer) * constants::MSG_BUFFERS * nodes_, MPI_BYTE, MPI_INFO_NULL, MPI_COMM_WORLD, &(peers[this_node_].msg_win)); + MPI_Win_create(&(peers[this_node_].flag_data), sizeof(cache_line_buffer) * constants::MSG_BUFFERS * nodes_, MPI_BYTE, MPI_INFO_NULL, MPI_COMM_WORLD, &(peers[this_node_].flag_win)); + + } else { //create remote windows without memory + void* dump; + MPI_Win_create(dump, 0, MPI_BYTE, MPI_INFO_NULL, MPI_COMM_WORLD, &(peers[i].msg_win)); + MPI_Win_create(dump, 0, MPI_BYTE, MPI_INFO_NULL, MPI_COMM_WORLD, &(peers[i].flag_win)); + + //MPI_Win_allocate(0, 1, MPI_INFO_NULL, MPI_COMM_WORLD, peers[i].msg_win_data, &(peers[i].rma_msg_win)); + //MPI_Win_allocate(0, 1, MPI_INFO_NULL, MPI_COMM_WORLD, peers[i].flag_win_data, &(peers[i].rma_flag_win)); + } } +/* + // initialise all windows for target -> host + for (node_t i = 1; i < nodes_; ++i) { + if (is_host()) { + // create local wins with memory for all targets + // allocate memory + + + // create window + MPI_Win_create(memptr, SIZE, MPI_BYTE, MPI_INFO_NULL, MPI_COMM_WORLD, &(peers[i].local_msg_win)); + MPI_Win_create(memptr, SIZE, MPI_BYTE, MPI_INFO_NULL, MPI_COMM_WORLD, &(peers[i].local_flag_win)); + } else { + // create remote wins without memory for host + if (i == this_node_) { + MPI_Win_create(memptr, 0, MPI_BYTE, MPI_INFO_NULL, MPI_COMM_WORLD, &(peers[host_node_].local_msg_win)); + MPI_Win_create(memptr, 0, MPI_BYTE, MPI_INFO_NULL, MPI_COMM_WORLD, &(peers[host_node_].local_msg_win)); + + } + } + } +*/ // get all locks to targets // targets lock to other targets for copies - for (node_t i = 1; i < nodes_; ++i) { // TODO(improvement): needs to be changed when host-rank becomes configurable - if(i != this_node_) { - MPI_Win_lock(MPI_LOCK_SHARED, i, 0, peers[i].rma_win); // shared locks because all ranks lock on every target concurrently + for (node_t i = 0; i < nodes_; ++i) { // TODO(improvement): needs to be changed when host-rank becomes configurable + if (i != this_node_) { + MPI_Win_lock(MPI_LOCK_SHARED, i, 0, peers[i].rma_data_win); // shared locks because all ranks lock on every target concurrently + MPI_Win_lock(MPI_LOCK_SHARED, i, 0, peers[i].msg_win); // shared locks because all ranks lock on every target concurrently + MPI_Win_lock(MPI_LOCK_SHARED, i, 0, peers[i].flag_win); // shared locks because all ranks lock on every target concurrently } } @@ -235,7 +306,7 @@ class communicator { MPI_Group_free(&pairwise_group); // no longer needed after COMM is created // init win to target - MPI_Win_create_dynamic(MPI_INFO_NULL, peers[i].rma_comm, &(peers[i].rma_win)); + MPI_Win_create_dynamic(MPI_INFO_NULL, peers[i].rma_comm, &(peers[i].rma_data_win)); // targets // init comm to host from pairwise subgroup const int members[2] = {host_node_, this_node_}; // NOTE: this implies new group rank = 0 for host, 1 for target @@ -245,7 +316,7 @@ class communicator { MPI_Group_free(&pairwise_group); // no longer needed after COMM is created // init win to host - MPI_Win_create_dynamic(MPI_INFO_NULL, peers[host_node_].rma_comm, &(peers[host_node_].rma_win)); + MPI_Win_create_dynamic(MPI_INFO_NULL, peers[host_node_].rma_comm, &(peers[host_node_].rma_data_win)); */ } @@ -256,17 +327,33 @@ class communicator { HAM_DEBUG( HAM_LOG << "~communicator" << std::endl; ) } - + // this is only used by the host + const request& allocate_next_request(node_t remote_node) + { + // this allocates a host-managed index for the remote nodes msg and flag buffers + // so the host knows which buffers are available on the target + const size_t remote_buffer_index = peers[remote_node].remote_buffer_pool.allocate(); + // this allocates an index for the hosts large msg and flag buffers + // request is included in offload message, so target knows into which buffers answers must be written + // when used, the index will need to be added to an offset determined by a targets rank to address the part of the buffer belonging to this target + // NOTE: the actual host buffer is stored at the hosts peers[0], but the buffer_pools are stored at the corresponding peers[target] + // buffer_pools manage idices within the targets section of the hosts buffer + const size_t local_buffer_index = peers[remote_node].local_buffer_pool.allocate(); + + peers[remote_node].next_request = { remote_node, this_node_, remote_buffer_index, local_buffer_index}; + + return peers[remote_node].next_request; + } + + // only used by host request allocate_request(node_t remote_node) { - HAM_DEBUG( HAM_LOG << "communicator::allocate_next_request(): remote_node = " << remote_node << std::endl; ) + HAM_DEBUG( HAM_LOG << "communicator::allocate_next_request(): remote_node = " << remote_node << std::endl; ) - const size_t send_buffer_index = peers[remote_node].buffer_pool.allocate(); - const size_t recv_buffer_index = peers[remote_node].buffer_pool.allocate(); - - return { remote_node, this_node_, send_buffer_index, recv_buffer_index }; + return peers[remote_node].next_request; } + // only used by host void free_request(request& req) { assert(req.valid()); @@ -274,27 +361,89 @@ class communicator { mpi_peer& peer = peers[req.target_node]; - peer.buffer_pool.free(req.send_buffer_index); - peer.buffer_pool.free(req.recv_buffer_index); + // set flags to false + // local flag inside large host flag buffer @ peers[host] + // index offset computed using target node + size_t offset = sizeof(cache_line_buffer) * constants::MSG_BUFFERS * req.target_node; + volatile size_t* local_flag = reinterpret_cast(&peers[host_node_].flag_data.get()[offset + req.local_buffer_index]); + *local_flag= FLAG_FALSE; + // remote flag on target + size_t remote_flag = FLAG_FALSE; + MPI_Put(&remote_flag, 1, MPI_INT64_T, req.target_node, 0, 1, MPI_INT64_T, peer.flag_win); + // flush? don't think so + + peer.remote_buffer_pool.free(req.remote_buffer_index); + peer.local_buffer_pool.free(req.local_buffer_index); + req.valid_ = false; } public: + // make private?! + // only called by host + // called by func below + void send_msg(node_t node, size_t buffer_index, size_t next_buffer_index, void* msg, size_t size) { + // write msg to target msg buffer + MPI_Put(msg, size, MPI_BYTE, node, buffer_index, size, MPI_BYTE, peers[node].msg_win); + + // TODO DANIEL: because MPI does not guarantee order on RMA ops, there might be a FLUSH necessary here + + // write flag to target flags buffer + // not sure on the size here? + MPI_Put(&next_buffer_index, 1, MPI_INT64_T, node, buffer_index, 1, MPI_INT64_T, peers[node].flag_win); + } + + // only called by host void send_msg(request_reference_type req, void* msg, size_t size) { - // copy message from caller into transfer buffer + /* + // copy message from caller into transfer buffer void* msg_buffer = static_cast(&peers[req.target_node].msg_buffers[req.send_buffer_index]); memcpy(msg_buffer, msg, size); MPI_Isend(msg_buffer, size, MPI_BYTE, req.target_node, constants::DEFAULT_TAG, MPI_COMM_WORLD, &req.next_mpi_request()); - } - + */ + + const request& next_req = allocate_next_request(req.target_node); // allocate_next_req needed?? + send_msg(req.target_node, req.remote_buffer_index, next_req.remote_buffer_index, msg, size); + } + + // make private?! + // called by function below + void * recv_msg(node_t node, size_t buffer_index = NO_BUFFER_INDEX, void* msg = nullptr, size_t size = constants::MSG_SIZE) + { + buffer_index = buffer_index == NO_BUFFER_INDEX ? peers[node].next_flag : buffer_index; + + volatile size_t* local_flag; + + if (this_node_ == host_node_) { + size_t offset = sizeof(cache_line_buffer) * constants::MSG_BUFFERS * node; + local_flag = reinterpret_cast(&peers[host_node_].flag_data.get()[offset + buffer_index]); + } else { + local_flag = reinterpret_cast(&peers[node].flag_data.get()[buffer_index]); + } + + + while (*local_flag == FLAG_FALSE); // poll on flag for completion + + if (*local_flag != NO_BUFFER_INDEX) // the flag contains the next buffer index to poll on + peers[node].next_flag = *local_flag; + + if (this_node_ == host_node_) { + size_t offset = sizeof(msg_buffer) * constants::MSG_BUFFERS * node; + return &peers[host_node_].msg_data.get()[offset + buffer_index]; + } else { + return &peers[node].msg_data.get()[buffer_index]; + } + } + // to be used by the offload target's main loop: synchronously receive one message at a time // NOTE: the local static receive buffer! void* recv_msg_host(void* msg = nullptr, size_t size = constants::MSG_SIZE) { - static msg_buffer buffer; // NOTE ! + /* static msg_buffer buffer; // NOTE ! MPI_Recv(&buffer, size, MPI_BYTE, host_node_, constants::DEFAULT_TAG, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - return static_cast(&buffer); + return static_cast(&buffer); */ + return recv_msg(host_node_, NO_BUFFER_INDEX, msg, size); } // trigger receiving the result of a message on the sending side @@ -302,10 +451,17 @@ class communicator { { // nothing todo here, since this communicator implementation uses one-sided communication // the data is already where it is expected (in the buffer referenced in req) - MPI_Irecv(static_cast(&peers[req.target_node].msg_buffers[req.recv_buffer_index]), constants::MSG_SIZE, MPI_BYTE, req.target_node, constants::RESULT_TAG, MPI_COMM_WORLD, &req.next_mpi_request()); + + // MPI_Irecv(static_cast(&peers[req.target_node].msg_buffers[req.recv_buffer_index]), constants::MSG_SIZE, MPI_BYTE, req.target_node, constants::RESULT_TAG, MPI_COMM_WORLD, &req.next_mpi_request()); return; } + bool test_local_flag(node_t node, size_t buffer_index) + { + volatile size_t * local_flag = reinterpret_cast(&peers[node].flag_data.get()[buffer_index]); + return *local_flag != FLAG_FALSE; + } + // in MPI RMA backend only used by copy // host uses async version // targets don't send data to host as host uses rma get @@ -313,10 +469,10 @@ class communicator { void send_data(T* local_source, buffer_ptr remote_dest, size_t size) { // execute transfer - // MPI_Win_lock(MPI_LOCK_SHARED, remote_dest.node(), 0, peers[remote_dest.node()].rma_win); // not needed since all ranks have locks on all targets - MPI_Put(local_source, size * sizeof(T), MPI_BYTE, remote_dest.node(), remote_dest.get_mpi_address(), size * sizeof(T), MPI_BYTE, peers[remote_dest.node()].rma_win); - MPI_Win_flush(remote_dest.node(), peers[remote_dest.node()].rma_win); - // MPI_Win_unlock(remote_dest.node(), peers[remote_dest.node()].rma_win); + // MPI_Win_lock(MPI_LOCK_SHARED, remote_dest.node(), 0, peers[remote_dest.node()].rma_data_win); // not needed since all ranks have locks on all targets + MPI_Put(local_source, size * sizeof(T), MPI_BYTE, remote_dest.node(), remote_dest.get_mpi_address(), size * sizeof(T), MPI_BYTE, peers[remote_dest.node()].rma_data_win); + MPI_Win_flush(remote_dest.node(), peers[remote_dest.node()].rma_data_win); + // MPI_Win_unlock(remote_dest.node(), peers[remote_dest.node()].rma_data_win); } // to be used by the host only @@ -325,8 +481,8 @@ class communicator { { req.uses_rma_ = true; - // MPI_Win_lock(MPI_LOCK_SHARED, remote_dest.node(), 0, peers[remote_dest.node()].rma_win); - MPI_Rput(local_source, size * sizeof(T), MPI_BYTE, remote_dest.node(), remote_dest.get_mpi_address(), size * sizeof(T), MPI_BYTE, peers[remote_dest.node()].rma_win, &req.next_mpi_request()); + // MPI_Win_lock(MPI_LOCK_SHARED, remote_dest.node(), 0, peers[remote_dest.node()].rma_data_win); + MPI_Rput(local_source, size * sizeof(T), MPI_BYTE, remote_dest.node(), remote_dest.get_mpi_address(), size * sizeof(T), MPI_BYTE, peers[remote_dest.node()].rma_data_win, &req.next_mpi_request()); } // not used in MPI RMA backend @@ -336,10 +492,10 @@ class communicator { template void recv_data(buffer_ptr remote_source, T* local_dest, size_t size) { - // MPI_Win_lock(MPI_LOCK_SHARED, remote_source.node(), 0, peers[remote_source.node()].rma_win); - MPI_Get(remote_source, size * sizeof(T), MPI_BYTE, remote_source.node(), remote_source.get_mpi_address(), size * sizeof(T), MPI_BYTE, peers[remote_source.node()].rma_win); - MPI_Win_flush(remote_source.node(), peers[remote_source.node()].rma_win); - // MPI_Win_unlock(remote_source.node(), peers[remote_source.node()].rma_win); + // MPI_Win_lock(MPI_LOCK_SHARED, remote_source.node(), 0, peers[remote_source.node()].rma_data_win); + MPI_Get(local_dest, size * sizeof(T), MPI_BYTE, remote_source.node(), remote_source.get_mpi_address(), size * sizeof(T), MPI_BYTE, peers[remote_source.node()].rma_data_win); + MPI_Win_flush(remote_source.node(), peers[remote_source.node()].rma_data_win); + // MPI_Win_unlock(remote_source.node(), peers[remote_source.node()].rma_data_win); } // to be used by the host @@ -348,8 +504,8 @@ class communicator { { req.uses_rma_ = true; - // MPI_Win_lock(MPI_LOCK_SHARED, remote_source.node(), 0, peers[remote_source.node()].rma_win); - MPI_Rget(local_dest, size * sizeof(T), MPI_BYTE, remote_source.node(), remote_source.get_mpi_address(), size * sizeof(T), MPI_BYTE, peers[remote_source.node()].rma_win, &req.next_mpi_request()); + // MPI_Win_lock(MPI_LOCK_SHARED, remote_source.node(), 0, peers[remote_source.node()].rma_data_win); + MPI_Rget(local_dest, size * sizeof(T), MPI_BYTE, remote_source.node(), remote_source.get_mpi_address(), size * sizeof(T), MPI_BYTE, peers[remote_source.node()].rma_data_win, &req.next_mpi_request()); } template @@ -359,9 +515,9 @@ class communicator { //int err = posix_memalign((void**)&ptr, constants::CACHE_LINE_SIZE, n * sizeof(T)); // attach to own window - MPI_Win_attach(peers[this_node_].rma_win, (void*)ptr, n * sizeof(T)); + MPI_Win_attach(peers[this_node_].rma_data_win, (void*)ptr, n * sizeof(T)); /* for (node_t i = 1; i < nodes_; ++i) { // nonsense, all accesses to a rank will only take place on that targets window, no need to attach to other - MPI_Win_attach(peers[i].rma_win, (void*)ptr, n * sizeof(T)); + MPI_Win_attach(peers[i].rma_data_win, (void*)ptr, n * sizeof(T)); } */ MPI_Aint mpi_address; MPI_Get_address((void*)ptr, &mpi_address); @@ -369,26 +525,29 @@ class communicator { return buffer_ptr(ptr, this_node_, mpi_address); } - // for host to allocate peer message buffers, needed because original function now manages rma window which must not happen for host-only local buffers + // for host to allocate peer message buffers, needed because original function now manages dynamic window for data buffers template buffer_ptr allocate_peer_buffer(const size_t n, node_t source_node) { - T* ptr; + // TODO DANIEL: this is where mem is allocated that should be mapped to static mpi windows + T* ptr; //int err = posix_memalign((void**)&ptr, constants::CACHE_LINE_SIZE, n * sizeof(T)); // NOTE: no ctor is called return buffer_ptr(ptr, this_node_); + } + // used for data buffers only template void free_buffer(buffer_ptr ptr) { assert(ptr.node() == this_node_); // NOTE: no dtor is called // remove from own rma window - MPI_Win_detach(peers[this_node_].rma_win, ptr.get()); + MPI_Win_detach(peers[this_node_].rma_data_win, ptr.get()); /* for (node_t i = 1; i < nodes_; ++i) { // nonsense, all accesses to a rank will only take place on that targets window, no need to attach to other - MPI_Win_detach(peers[i].rma_win, ptr.get()); + MPI_Win_detach(peers[i].rma_data_win, ptr.get()); } */ free(static_cast(ptr.get())); } @@ -397,6 +556,8 @@ class communicator { template void free_peer_buffer(buffer_ptr ptr) { + // TODO DANIEL: this is where mem is freed that should be mapped to static mpi windows + // i dont think this is ever called on the actual memory mapped to static mpi windows, freeing it would equal "disconnecting" corresponding target assert(ptr.node() == this_node_); // NOTE: no dtor is called free(static_cast(ptr.get())); @@ -417,7 +578,7 @@ class communicator { // called to check if an rma path between two targets exists, sufficient to call on one of the two targets bool has_rma_path(node_t target_node) { // check if copy path exists - return !peers[remote_dest.node()].rma_win; + return !peers[remote_dest.node()].rma_data_win; } */ /* @@ -438,7 +599,7 @@ class communicator { MPI_Group_incl(global_group, 2, members, &pairwise_group); MPI_Comm_create_group(MPI_COMM_WORLD, pairwise_group, 0, &(peers[target_node].rma_comm)); MPI_Group_free(&pairwise_group); // no longer needed after COMM is created - MPI_Win_create_dynamic(MPI_INFO_NULL, peers[target_node].rma_comm, &(peers[target_node].rma_win)); + MPI_Win_create_dynamic(MPI_INFO_NULL, peers[target_node].rma_comm, &(peers[target_node].rma_data_win)); } } */ @@ -450,19 +611,33 @@ class communicator { node_t host_node_; std::vector node_descriptions; // not as member in peer below, because Allgather is used to exchange node descriptions - struct mpi_peer { - buffer_ptr msg_buffers; // buffers used for MPI_ISend and IRecv by the sender + struct mpi_peer { + buffer_ptr msg_buffers; // buffers used for MPI_ISend and IRecv by the sender // buffers used for MPI_RPut and RGet // needed by sender to manage which buffers are in use and which are free // just manages indices, that can be used by - detail::resource_pool buffer_pool; + detail::resource_pool local_buffer_pool; + detail::resource_pool remote_buffer_pool; + + request next_request; + size_t next_flag = 0; - // mpi rma dynamic window - MPI_Win rma_win; + // NOTE: behind these buffers are MSG_BUFFERS many buffers of size MSG_SIZE/CACHE_LINE_SIZE, indices are managed by buffer_pool + + // static window for inbound rma messages + buffer_ptr msg_data; + MPI_Win msg_win; + // static window for inbound message flags + buffer_ptr flag_data; + MPI_Win flag_win; + + // mpi rma dynamic window for data + MPI_Win rma_data_win; }; - + + mpi_peer* peers; -}; + }; template buffer_ptr::buffer_ptr() : buffer_ptr(nullptr, communicator::this_node()) { } diff --git a/include/ham/offload/offload_msg.hpp b/include/ham/offload/offload_msg.hpp index cb8a5a8..97c5e95 100644 --- a/include/ham/offload/offload_msg.hpp +++ b/include/ham/offload/offload_msg.hpp @@ -134,7 +134,7 @@ class offload_read_msg size_t n; }; -//#ifdef HAM_COMM_MPI_RMA_DYNAMIC +#ifdef HAM_COMM_MPI_RMA_DYNAMIC template class ExecutionPolicy = default_execution_policy> class offload_rma_copy_msg : public active_msg, ExecutionPolicy> @@ -163,7 +163,7 @@ class offload_read_msg T* local_source; size_t n; }; -//#endif +#endif /* // allows user to setup an rma link between two targets without a copy transfer From cb3246e6e652c629e3a475fad189e1d25c2edaab Mon Sep 17 00:00:00 2001 From: Phuzzyhead Date: Mon, 23 Apr 2018 14:06:41 +0200 Subject: [PATCH 029/150] fixed recv_msg checking flag buffer @ wrong peer --- include/ham/net/communicator_mpi_rma_dynamic.hpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/include/ham/net/communicator_mpi_rma_dynamic.hpp b/include/ham/net/communicator_mpi_rma_dynamic.hpp index 2e29c0c..8449a6d 100644 --- a/include/ham/net/communicator_mpi_rma_dynamic.hpp +++ b/include/ham/net/communicator_mpi_rma_dynamic.hpp @@ -380,20 +380,18 @@ class communicator { public: // make private?! - // only called by host // called by func below void send_msg(node_t node, size_t buffer_index, size_t next_buffer_index, void* msg, size_t size) { // write msg to target msg buffer MPI_Put(msg, size, MPI_BYTE, node, buffer_index, size, MPI_BYTE, peers[node].msg_win); // TODO DANIEL: because MPI does not guarantee order on RMA ops, there might be a FLUSH necessary here - + MPI_Win_flush(node ,peers[node].msg_win); // write flag to target flags buffer // not sure on the size here? MPI_Put(&next_buffer_index, 1, MPI_INT64_T, node, buffer_index, 1, MPI_INT64_T, peers[node].flag_win); } - // only called by host void send_msg(request_reference_type req, void* msg, size_t size) { /* @@ -419,7 +417,7 @@ class communicator { size_t offset = sizeof(cache_line_buffer) * constants::MSG_BUFFERS * node; local_flag = reinterpret_cast(&peers[host_node_].flag_data.get()[offset + buffer_index]); } else { - local_flag = reinterpret_cast(&peers[node].flag_data.get()[buffer_index]); + local_flag = reinterpret_cast(&peers[this_node_].flag_data.get()[buffer_index]); } From 8b0c0e31f2108eb0f38970c9193948e8dc49abe6 Mon Sep 17 00:00:00 2001 From: Phuzzyhead Date: Mon, 23 Apr 2018 14:14:09 +0200 Subject: [PATCH 030/150] fixed recv_msg returning wrong peer buffer --- include/ham/net/communicator_mpi_rma_dynamic.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/ham/net/communicator_mpi_rma_dynamic.hpp b/include/ham/net/communicator_mpi_rma_dynamic.hpp index 8449a6d..fa87caa 100644 --- a/include/ham/net/communicator_mpi_rma_dynamic.hpp +++ b/include/ham/net/communicator_mpi_rma_dynamic.hpp @@ -430,7 +430,7 @@ class communicator { size_t offset = sizeof(msg_buffer) * constants::MSG_BUFFERS * node; return &peers[host_node_].msg_data.get()[offset + buffer_index]; } else { - return &peers[node].msg_data.get()[buffer_index]; + return &peers[this_node_].msg_data.get()[buffer_index]; } } From 72f9f232b3509955851e54912b1123e930af139e Mon Sep 17 00:00:00 2001 From: Phuzzyhead Date: Mon, 23 Apr 2018 14:46:33 +0200 Subject: [PATCH 031/150] fixed wrong offset computation for host buffers --- include/ham/net/communicator_mpi_rma_dynamic.hpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/include/ham/net/communicator_mpi_rma_dynamic.hpp b/include/ham/net/communicator_mpi_rma_dynamic.hpp index fa87caa..89acb24 100644 --- a/include/ham/net/communicator_mpi_rma_dynamic.hpp +++ b/include/ham/net/communicator_mpi_rma_dynamic.hpp @@ -234,6 +234,7 @@ class communicator { // fill resource pools for (size_t j = 0; j < nodes_; ++j) { for (size_t k = constants::MSG_BUFFERS; k > 0; --k) { + // target buffers peers[j].local_buffer_pool.add(k - 1); peers[j].remote_buffer_pool.add(k - 1); } @@ -364,7 +365,7 @@ class communicator { // set flags to false // local flag inside large host flag buffer @ peers[host] // index offset computed using target node - size_t offset = sizeof(cache_line_buffer) * constants::MSG_BUFFERS * req.target_node; + size_t offset = constants::MSG_BUFFERS * req.target_node; volatile size_t* local_flag = reinterpret_cast(&peers[host_node_].flag_data.get()[offset + req.local_buffer_index]); *local_flag= FLAG_FALSE; // remote flag on target @@ -414,7 +415,7 @@ class communicator { volatile size_t* local_flag; if (this_node_ == host_node_) { - size_t offset = sizeof(cache_line_buffer) * constants::MSG_BUFFERS * node; + size_t offset = constants::MSG_BUFFERS * node; local_flag = reinterpret_cast(&peers[host_node_].flag_data.get()[offset + buffer_index]); } else { local_flag = reinterpret_cast(&peers[this_node_].flag_data.get()[buffer_index]); @@ -427,7 +428,7 @@ class communicator { peers[node].next_flag = *local_flag; if (this_node_ == host_node_) { - size_t offset = sizeof(msg_buffer) * constants::MSG_BUFFERS * node; + size_t offset = constants::MSG_BUFFERS * node; return &peers[host_node_].msg_data.get()[offset + buffer_index]; } else { return &peers[this_node_].msg_data.get()[buffer_index]; From 85d19896b86adb20ce66e5f23951c76856d2a26d Mon Sep 17 00:00:00 2001 From: Phuzzyhead Date: Mon, 23 Apr 2018 15:56:21 +0200 Subject: [PATCH 032/150] added flag buffer init with FLAG_FALSE --- include/ham/net/communicator_mpi_rma_dynamic.hpp | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/include/ham/net/communicator_mpi_rma_dynamic.hpp b/include/ham/net/communicator_mpi_rma_dynamic.hpp index 89acb24..2af3512 100644 --- a/include/ham/net/communicator_mpi_rma_dynamic.hpp +++ b/include/ham/net/communicator_mpi_rma_dynamic.hpp @@ -231,6 +231,7 @@ class communicator { // MSG_SIZE/FLAG_SIZE * MSG_BUFFERS * num_nodes for host peers[this_node_].msg_data = allocate_peer_buffer(constants::MSG_BUFFERS * nodes_, this_node_); peers[this_node_].flag_data = allocate_peer_buffer(constants::MSG_BUFFERS * nodes_, this_node_); + reset_flags(peers[this_node_].flag_data); // fill resource pools for (size_t j = 0; j < nodes_; ++j) { for (size_t k = constants::MSG_BUFFERS; k > 0; --k) { @@ -245,6 +246,7 @@ class communicator { // MSG_SIZE/FLAG_SIZE * MSG_BUFFERS for targets peers[this_node_].msg_data = allocate_peer_buffer(constants::MSG_BUFFERS, this_node_); peers[this_node_].flag_data = allocate_peer_buffer(constants::MSG_BUFFERS, this_node_); + reset_flags(peers[this_node_].flag_data); } // create windows @@ -461,6 +463,18 @@ class communicator { return *local_flag != FLAG_FALSE; } + void reset_flags(buffer_ptr flags) + { + cache_line_buffer fill_value; + cache_line_buffer* fill_value_ptr = &fill_value; + // null fill_value + std::fill(reinterpret_cast(fill_value_ptr), reinterpret_cast(fill_value_ptr) + sizeof(cache_line_buffer), 0); + // set to flag false + *reinterpret_cast(fill_value_ptr) = FLAG_FALSE; + // set all flags to fill_value + std::fill(flags.get(), flags.get() + constants::MSG_BUFFERS, fill_value); + } + // in MPI RMA backend only used by copy // host uses async version // targets don't send data to host as host uses rma get From 304b71d678a70851567c7bfcc4e5f3703eaa4ef7 Mon Sep 17 00:00:00 2001 From: Phuzzyhead Date: Mon, 23 Apr 2018 17:43:37 +0200 Subject: [PATCH 033/150] fixed in-buffer addressing --- .../ham/net/communicator_mpi_rma_dynamic.hpp | 21 ++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/include/ham/net/communicator_mpi_rma_dynamic.hpp b/include/ham/net/communicator_mpi_rma_dynamic.hpp index 2af3512..b0b7865 100644 --- a/include/ham/net/communicator_mpi_rma_dynamic.hpp +++ b/include/ham/net/communicator_mpi_rma_dynamic.hpp @@ -386,13 +386,20 @@ class communicator { // called by func below void send_msg(node_t node, size_t buffer_index, size_t next_buffer_index, void* msg, size_t size) { // write msg to target msg buffer - MPI_Put(msg, size, MPI_BYTE, node, buffer_index, size, MPI_BYTE, peers[node].msg_win); - - // TODO DANIEL: because MPI does not guarantee order on RMA ops, there might be a FLUSH necessary here - MPI_Win_flush(node ,peers[node].msg_win); - // write flag to target flags buffer - // not sure on the size here? - MPI_Put(&next_buffer_index, 1, MPI_INT64_T, node, buffer_index, 1, MPI_INT64_T, peers[node].flag_win); + if (node != host_node_) { // to targets + MPI_Put(msg, size, MPI_BYTE, node, sizeof(msg_buffer) * buffer_index, size, MPI_BYTE, peers[node].msg_win); + + // TODO DANIEL: because MPI does not guarantee order on RMA ops, there might be a FLUSH necessary here + MPI_Win_flush(node, peers[node].msg_win); + // write flag to target flags buffer + // not sure on the size here? + MPI_Put(&next_buffer_index, sizeof(cache_line_buffer), MPI_BYTE, node, sizeof(cache_line_buffer) * buffer_index, sizeof(cache_line_buffer), MPI_BYTE, peers[node].flag_win); + } else { // to host, used by send_result + size_t offset = constants::MSG_BUFFERS * this_node_; + MPI_Put(msg, size, MPI_BYTE, node, sizeof(msg_buffer) * (offset + buffer_index), size, MPI_BYTE, peers[node].msg_win); + MPI_Win_flush(node, peers[node].msg_win); + MPI_Put(&next_buffer_index, sizeof(cache_line_buffer), MPI_BYTE, node, sizeof(cache_line_buffer) * (offset + buffer_index), sizeof(cache_line_buffer), MPI_BYTE, peers[node].flag_win); + } } void send_msg(request_reference_type req, void* msg, size_t size) From 24fba45c7f736d40f3ff1e3ed835789b98b95e59 Mon Sep 17 00:00:00 2001 From: Phuzzyhead Date: Mon, 23 Apr 2018 18:18:11 +0200 Subject: [PATCH 034/150] fixed flag init for large host buffer --- include/ham/net/communicator_mpi_rma_dynamic.hpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/ham/net/communicator_mpi_rma_dynamic.hpp b/include/ham/net/communicator_mpi_rma_dynamic.hpp index b0b7865..daee37d 100644 --- a/include/ham/net/communicator_mpi_rma_dynamic.hpp +++ b/include/ham/net/communicator_mpi_rma_dynamic.hpp @@ -231,7 +231,7 @@ class communicator { // MSG_SIZE/FLAG_SIZE * MSG_BUFFERS * num_nodes for host peers[this_node_].msg_data = allocate_peer_buffer(constants::MSG_BUFFERS * nodes_, this_node_); peers[this_node_].flag_data = allocate_peer_buffer(constants::MSG_BUFFERS * nodes_, this_node_); - reset_flags(peers[this_node_].flag_data); + reset_flags(peers[this_node_].flag_data, constants::MSG_BUFFERS * nodes_); // fill resource pools for (size_t j = 0; j < nodes_; ++j) { for (size_t k = constants::MSG_BUFFERS; k > 0; --k) { @@ -246,7 +246,7 @@ class communicator { // MSG_SIZE/FLAG_SIZE * MSG_BUFFERS for targets peers[this_node_].msg_data = allocate_peer_buffer(constants::MSG_BUFFERS, this_node_); peers[this_node_].flag_data = allocate_peer_buffer(constants::MSG_BUFFERS, this_node_); - reset_flags(peers[this_node_].flag_data); + reset_flags(peers[this_node_].flag_data, constants::MSG_BUFFERS); } // create windows @@ -470,7 +470,7 @@ class communicator { return *local_flag != FLAG_FALSE; } - void reset_flags(buffer_ptr flags) + void reset_flags(buffer_ptr flags, size_t size) { cache_line_buffer fill_value; cache_line_buffer* fill_value_ptr = &fill_value; @@ -479,7 +479,7 @@ class communicator { // set to flag false *reinterpret_cast(fill_value_ptr) = FLAG_FALSE; // set all flags to fill_value - std::fill(flags.get(), flags.get() + constants::MSG_BUFFERS, fill_value); + std::fill(flags.get(), flags.get() + size, fill_value); } // in MPI RMA backend only used by copy From fdcd288ea281def509901dbd2c58439a2db1516a Mon Sep 17 00:00:00 2001 From: Phuzzyhead Date: Tue, 24 Apr 2018 14:08:46 +0200 Subject: [PATCH 035/150] fixed window creation buffer pointer --- include/ham/net/communicator_mpi_rma_dynamic.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/ham/net/communicator_mpi_rma_dynamic.hpp b/include/ham/net/communicator_mpi_rma_dynamic.hpp index daee37d..943822f 100644 --- a/include/ham/net/communicator_mpi_rma_dynamic.hpp +++ b/include/ham/net/communicator_mpi_rma_dynamic.hpp @@ -250,8 +250,8 @@ class communicator { } // create windows - MPI_Win_create(&(peers[this_node_].msg_data), sizeof(msg_buffer) * constants::MSG_BUFFERS * nodes_, MPI_BYTE, MPI_INFO_NULL, MPI_COMM_WORLD, &(peers[this_node_].msg_win)); - MPI_Win_create(&(peers[this_node_].flag_data), sizeof(cache_line_buffer) * constants::MSG_BUFFERS * nodes_, MPI_BYTE, MPI_INFO_NULL, MPI_COMM_WORLD, &(peers[this_node_].flag_win)); + MPI_Win_create((peers[this_node_].msg_data.get()), sizeof(msg_buffer) * constants::MSG_BUFFERS * nodes_, MPI_BYTE, MPI_INFO_NULL, MPI_COMM_WORLD, &(peers[this_node_].msg_win)); + MPI_Win_create((peers[this_node_].flag_data.get()), sizeof(cache_line_buffer) * constants::MSG_BUFFERS * nodes_, MPI_BYTE, MPI_INFO_NULL, MPI_COMM_WORLD, &(peers[this_node_].flag_win)); } else { //create remote windows without memory void* dump; From 6de2bfd49214ca34bc309d0296759759baa5162f Mon Sep 17 00:00:00 2001 From: Phuzzyhead Date: Tue, 24 Apr 2018 14:21:43 +0200 Subject: [PATCH 036/150] fixed size for put of flag --- include/ham/net/communicator_mpi_rma_dynamic.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/ham/net/communicator_mpi_rma_dynamic.hpp b/include/ham/net/communicator_mpi_rma_dynamic.hpp index 943822f..86a8a38 100644 --- a/include/ham/net/communicator_mpi_rma_dynamic.hpp +++ b/include/ham/net/communicator_mpi_rma_dynamic.hpp @@ -393,12 +393,12 @@ class communicator { MPI_Win_flush(node, peers[node].msg_win); // write flag to target flags buffer // not sure on the size here? - MPI_Put(&next_buffer_index, sizeof(cache_line_buffer), MPI_BYTE, node, sizeof(cache_line_buffer) * buffer_index, sizeof(cache_line_buffer), MPI_BYTE, peers[node].flag_win); + MPI_Put(&next_buffer_index, sizeof(next_buffer_index), MPI_BYTE, node, sizeof(cache_line_buffer) * buffer_index, sizeof(next_buffer_index), MPI_BYTE, peers[node].flag_win); } else { // to host, used by send_result size_t offset = constants::MSG_BUFFERS * this_node_; MPI_Put(msg, size, MPI_BYTE, node, sizeof(msg_buffer) * (offset + buffer_index), size, MPI_BYTE, peers[node].msg_win); MPI_Win_flush(node, peers[node].msg_win); - MPI_Put(&next_buffer_index, sizeof(cache_line_buffer), MPI_BYTE, node, sizeof(cache_line_buffer) * (offset + buffer_index), sizeof(cache_line_buffer), MPI_BYTE, peers[node].flag_win); + MPI_Put(&next_buffer_index, sizeof(next_buffer_index), MPI_BYTE, node, sizeof(cache_line_buffer) * (offset + buffer_index), sizeof(next_buffer_index), MPI_BYTE, peers[node].flag_win); } } From e0cc1afb3d1bb93709767d301af7f5e106d28832 Mon Sep 17 00:00:00 2001 From: Phuzzyhead Date: Tue, 24 Apr 2018 16:29:52 +0200 Subject: [PATCH 037/150] fixed displacement on host window --- include/ham/net/communicator_mpi_rma_dynamic.hpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/ham/net/communicator_mpi_rma_dynamic.hpp b/include/ham/net/communicator_mpi_rma_dynamic.hpp index 86a8a38..bde7d13 100644 --- a/include/ham/net/communicator_mpi_rma_dynamic.hpp +++ b/include/ham/net/communicator_mpi_rma_dynamic.hpp @@ -250,13 +250,13 @@ class communicator { } // create windows - MPI_Win_create((peers[this_node_].msg_data.get()), sizeof(msg_buffer) * constants::MSG_BUFFERS * nodes_, MPI_BYTE, MPI_INFO_NULL, MPI_COMM_WORLD, &(peers[this_node_].msg_win)); - MPI_Win_create((peers[this_node_].flag_data.get()), sizeof(cache_line_buffer) * constants::MSG_BUFFERS * nodes_, MPI_BYTE, MPI_INFO_NULL, MPI_COMM_WORLD, &(peers[this_node_].flag_win)); + MPI_Win_create((peers[this_node_].msg_data.get()), sizeof(msg_buffer) * constants::MSG_BUFFERS * nodes_, 1, MPI_INFO_NULL, MPI_COMM_WORLD, &(peers[this_node_].msg_win)); + MPI_Win_create((peers[this_node_].flag_data.get()), sizeof(cache_line_buffer) * constants::MSG_BUFFERS * nodes_, 1, MPI_INFO_NULL, MPI_COMM_WORLD, &(peers[this_node_].flag_win)); } else { //create remote windows without memory void* dump; - MPI_Win_create(dump, 0, MPI_BYTE, MPI_INFO_NULL, MPI_COMM_WORLD, &(peers[i].msg_win)); - MPI_Win_create(dump, 0, MPI_BYTE, MPI_INFO_NULL, MPI_COMM_WORLD, &(peers[i].flag_win)); + MPI_Win_create(dump, 0, 1, MPI_INFO_NULL, MPI_COMM_WORLD, &(peers[i].msg_win)); + MPI_Win_create(dump, 0, 1, MPI_INFO_NULL, MPI_COMM_WORLD, &(peers[i].flag_win)); //MPI_Win_allocate(0, 1, MPI_INFO_NULL, MPI_COMM_WORLD, peers[i].msg_win_data, &(peers[i].rma_msg_win)); //MPI_Win_allocate(0, 1, MPI_INFO_NULL, MPI_COMM_WORLD, peers[i].flag_win_data, &(peers[i].rma_flag_win)); From 339486d0a02c110741c64f5b6af519774b9248b3 Mon Sep 17 00:00:00 2001 From: Phuzzyhead Date: Tue, 24 Apr 2018 17:53:17 +0200 Subject: [PATCH 038/150] fixed waiting for reply msg for data transfers --- include/ham/net/communicator_mpi_rma_dynamic.hpp | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/include/ham/net/communicator_mpi_rma_dynamic.hpp b/include/ham/net/communicator_mpi_rma_dynamic.hpp index bde7d13..20e8e99 100644 --- a/include/ham/net/communicator_mpi_rma_dynamic.hpp +++ b/include/ham/net/communicator_mpi_rma_dynamic.hpp @@ -100,17 +100,19 @@ class communicator { void* get() // blocks { - /* + HAM_DEBUG( HAM_LOG << "request::get(), before MPI_Waitall()" << std::endl; ) MPI_Waitall(req_count, mpi_reqs, MPI_STATUS_IGNORE); // must wait for all requests to satisfy the standard HAM_DEBUG( HAM_LOG << "request::get(), after MPI_Waitall()" << std::endl; ) if(uses_rma_) { MPI_Win_flush(target_node, communicator::instance().peers[target_node].rma_data_win); + // this is just a dummy return, there is no reply from the target for rma data transfers + // TODO, Daniel - design decision on what to return here + return static_cast(&communicator::instance().peers[communicator::this_node()].msg_data[local_buffer_index]); + } else { + return communicator::instance().recv_msg(target_node, local_buffer_index); } - return static_cast(&communicator::instance().peers[target_node].msg_buffers[recv_buffer_index]); - */ - return communicator::instance().recv_msg(target_node, local_buffer_index); } template From 3f6c30694d49fd8e8e22f7750de82dcdc695377f Mon Sep 17 00:00:00 2001 From: Phuzzyhead Date: Fri, 27 Apr 2018 14:45:23 +0200 Subject: [PATCH 039/150] added debug output to track double-free issue --- include/ham/net/communicator_mpi_rma_dynamic.hpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/include/ham/net/communicator_mpi_rma_dynamic.hpp b/include/ham/net/communicator_mpi_rma_dynamic.hpp index 20e8e99..f04a3b6 100644 --- a/include/ham/net/communicator_mpi_rma_dynamic.hpp +++ b/include/ham/net/communicator_mpi_rma_dynamic.hpp @@ -419,7 +419,7 @@ class communicator { // make private?! // called by function below - void * recv_msg(node_t node, size_t buffer_index = NO_BUFFER_INDEX, void* msg = nullptr, size_t size = constants::MSG_SIZE) + void* recv_msg(node_t node, size_t buffer_index = NO_BUFFER_INDEX, void* msg = nullptr, size_t size = constants::MSG_SIZE) { buffer_index = buffer_index == NO_BUFFER_INDEX ? peers[node].next_flag : buffer_index; @@ -537,6 +537,7 @@ class communicator { //int err = posix_memalign((void**)&ptr, constants::CACHE_LINE_SIZE, n * sizeof(T)); // attach to own window + HAM_DEBUG( cout << "allocated buffer @: " << ptr << " on node: " << node << std::endl; ) MPI_Win_attach(peers[this_node_].rma_data_win, (void*)ptr, n * sizeof(T)); /* for (node_t i = 1; i < nodes_; ++i) { // nonsense, all accesses to a rank will only take place on that targets window, no need to attach to other MPI_Win_attach(peers[i].rma_data_win, (void*)ptr, n * sizeof(T)); @@ -567,6 +568,7 @@ class communicator { assert(ptr.node() == this_node_); // NOTE: no dtor is called // remove from own rma window + HAM_DEBUG( cout << "freeing buffer @: " << ptr << " on node: " << node << std::endl; ) MPI_Win_detach(peers[this_node_].rma_data_win, ptr.get()); /* for (node_t i = 1; i < nodes_; ++i) { // nonsense, all accesses to a rank will only take place on that targets window, no need to attach to other MPI_Win_detach(peers[i].rma_data_win, ptr.get()); From 81675fb08a0e6cee6182d893a140be71e42c5ebc Mon Sep 17 00:00:00 2001 From: Phuzzyhead Date: Fri, 27 Apr 2018 14:46:35 +0200 Subject: [PATCH 040/150] added debug output to track double-free issue --- include/ham/net/communicator_mpi_rma_dynamic.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/ham/net/communicator_mpi_rma_dynamic.hpp b/include/ham/net/communicator_mpi_rma_dynamic.hpp index f04a3b6..7906b54 100644 --- a/include/ham/net/communicator_mpi_rma_dynamic.hpp +++ b/include/ham/net/communicator_mpi_rma_dynamic.hpp @@ -568,7 +568,7 @@ class communicator { assert(ptr.node() == this_node_); // NOTE: no dtor is called // remove from own rma window - HAM_DEBUG( cout << "freeing buffer @: " << ptr << " on node: " << node << std::endl; ) + HAM_DEBUG( cout << "freeing buffer @: " << ptr << " on node: " << this_node_ << std::endl; ) MPI_Win_detach(peers[this_node_].rma_data_win, ptr.get()); /* for (node_t i = 1; i < nodes_; ++i) { // nonsense, all accesses to a rank will only take place on that targets window, no need to attach to other MPI_Win_detach(peers[i].rma_data_win, ptr.get()); From 7e3ead35a2d548a5f11ea1a4bdca2d01c67209b5 Mon Sep 17 00:00:00 2001 From: Phuzzyhead Date: Fri, 27 Apr 2018 14:47:37 +0200 Subject: [PATCH 041/150] added debug output to track double-free issue --- include/ham/net/communicator_mpi_rma_dynamic.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/ham/net/communicator_mpi_rma_dynamic.hpp b/include/ham/net/communicator_mpi_rma_dynamic.hpp index 7906b54..cbe23b6 100644 --- a/include/ham/net/communicator_mpi_rma_dynamic.hpp +++ b/include/ham/net/communicator_mpi_rma_dynamic.hpp @@ -537,7 +537,7 @@ class communicator { //int err = posix_memalign((void**)&ptr, constants::CACHE_LINE_SIZE, n * sizeof(T)); // attach to own window - HAM_DEBUG( cout << "allocated buffer @: " << ptr << " on node: " << node << std::endl; ) + HAM_DEBUG( std::cout << "allocated buffer @: " << ptr << " on node: " << node << std::endl; ) MPI_Win_attach(peers[this_node_].rma_data_win, (void*)ptr, n * sizeof(T)); /* for (node_t i = 1; i < nodes_; ++i) { // nonsense, all accesses to a rank will only take place on that targets window, no need to attach to other MPI_Win_attach(peers[i].rma_data_win, (void*)ptr, n * sizeof(T)); @@ -568,7 +568,7 @@ class communicator { assert(ptr.node() == this_node_); // NOTE: no dtor is called // remove from own rma window - HAM_DEBUG( cout << "freeing buffer @: " << ptr << " on node: " << this_node_ << std::endl; ) + HAM_DEBUG( std::cout << "freeing buffer @: " << ptr << " on node: " << this_node_ << std::endl; ) MPI_Win_detach(peers[this_node_].rma_data_win, ptr.get()); /* for (node_t i = 1; i < nodes_; ++i) { // nonsense, all accesses to a rank will only take place on that targets window, no need to attach to other MPI_Win_detach(peers[i].rma_data_win, ptr.get()); From 0d0923a3b33c0d787ccb032adff9bf7ce25ea9f3 Mon Sep 17 00:00:00 2001 From: Phuzzyhead Date: Fri, 27 Apr 2018 14:49:59 +0200 Subject: [PATCH 042/150] added debug output to track double-free issue --- include/ham/net/communicator_mpi_rma_dynamic.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/ham/net/communicator_mpi_rma_dynamic.hpp b/include/ham/net/communicator_mpi_rma_dynamic.hpp index cbe23b6..aa64160 100644 --- a/include/ham/net/communicator_mpi_rma_dynamic.hpp +++ b/include/ham/net/communicator_mpi_rma_dynamic.hpp @@ -537,7 +537,7 @@ class communicator { //int err = posix_memalign((void**)&ptr, constants::CACHE_LINE_SIZE, n * sizeof(T)); // attach to own window - HAM_DEBUG( std::cout << "allocated buffer @: " << ptr << " on node: " << node << std::endl; ) + HAM_DEBUG( HAM_LOG << "allocated buffer @: " << ptr << " on node: " << node << std::endl; ) MPI_Win_attach(peers[this_node_].rma_data_win, (void*)ptr, n * sizeof(T)); /* for (node_t i = 1; i < nodes_; ++i) { // nonsense, all accesses to a rank will only take place on that targets window, no need to attach to other MPI_Win_attach(peers[i].rma_data_win, (void*)ptr, n * sizeof(T)); @@ -568,7 +568,7 @@ class communicator { assert(ptr.node() == this_node_); // NOTE: no dtor is called // remove from own rma window - HAM_DEBUG( std::cout << "freeing buffer @: " << ptr << " on node: " << this_node_ << std::endl; ) + HAM_DEBUG( HAM_LOG << "freeing buffer @: " << ptr << " on node: " << this_node_ << std::endl; ) MPI_Win_detach(peers[this_node_].rma_data_win, ptr.get()); /* for (node_t i = 1; i < nodes_; ++i) { // nonsense, all accesses to a rank will only take place on that targets window, no need to attach to other MPI_Win_detach(peers[i].rma_data_win, ptr.get()); From d7fa6abfa7d456f33f221e41e7731794314f695c Mon Sep 17 00:00:00 2001 From: Phuzzyhead Date: Fri, 27 Apr 2018 14:52:04 +0200 Subject: [PATCH 043/150] added debug output to track double-free issue --- include/ham/net/communicator_mpi_rma_dynamic.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/ham/net/communicator_mpi_rma_dynamic.hpp b/include/ham/net/communicator_mpi_rma_dynamic.hpp index aa64160..4580664 100644 --- a/include/ham/net/communicator_mpi_rma_dynamic.hpp +++ b/include/ham/net/communicator_mpi_rma_dynamic.hpp @@ -537,7 +537,7 @@ class communicator { //int err = posix_memalign((void**)&ptr, constants::CACHE_LINE_SIZE, n * sizeof(T)); // attach to own window - HAM_DEBUG( HAM_LOG << "allocated buffer @: " << ptr << " on node: " << node << std::endl; ) + HAM_DEBUG( HAM_LOG << "allocated buffer @: " << ptr << " on node: " << source_node << std::endl; ) MPI_Win_attach(peers[this_node_].rma_data_win, (void*)ptr, n * sizeof(T)); /* for (node_t i = 1; i < nodes_; ++i) { // nonsense, all accesses to a rank will only take place on that targets window, no need to attach to other MPI_Win_attach(peers[i].rma_data_win, (void*)ptr, n * sizeof(T)); From fc5e2e58ae904f6a6054a87256ee0c91bd36758b Mon Sep 17 00:00:00 2001 From: Phuzzyhead Date: Fri, 27 Apr 2018 14:54:15 +0200 Subject: [PATCH 044/150] added debug output to track double-free issue --- include/ham/net/communicator_mpi_rma_dynamic.hpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/include/ham/net/communicator_mpi_rma_dynamic.hpp b/include/ham/net/communicator_mpi_rma_dynamic.hpp index 4580664..556f75e 100644 --- a/include/ham/net/communicator_mpi_rma_dynamic.hpp +++ b/include/ham/net/communicator_mpi_rma_dynamic.hpp @@ -537,7 +537,8 @@ class communicator { //int err = posix_memalign((void**)&ptr, constants::CACHE_LINE_SIZE, n * sizeof(T)); // attach to own window - HAM_DEBUG( HAM_LOG << "allocated buffer @: " << ptr << " on node: " << source_node << std::endl; ) + HAM_DEBUG( HAM_LOG << "allocated buffer @: " << ptr << std::endl; ) + HAM_DEBUG( HAM_LOG << "on node: " << source_node << std::endl; ) MPI_Win_attach(peers[this_node_].rma_data_win, (void*)ptr, n * sizeof(T)); /* for (node_t i = 1; i < nodes_; ++i) { // nonsense, all accesses to a rank will only take place on that targets window, no need to attach to other MPI_Win_attach(peers[i].rma_data_win, (void*)ptr, n * sizeof(T)); @@ -568,7 +569,8 @@ class communicator { assert(ptr.node() == this_node_); // NOTE: no dtor is called // remove from own rma window - HAM_DEBUG( HAM_LOG << "freeing buffer @: " << ptr << " on node: " << this_node_ << std::endl; ) + HAM_DEBUG( HAM_LOG << "freeing buffer @: " << ptr << std::endl; ) + HAM_DEBUG( HAM_LOG << "on node: " << source_node << std::endl; ) MPI_Win_detach(peers[this_node_].rma_data_win, ptr.get()); /* for (node_t i = 1; i < nodes_; ++i) { // nonsense, all accesses to a rank will only take place on that targets window, no need to attach to other MPI_Win_detach(peers[i].rma_data_win, ptr.get()); From cf40181e8ba7d9dc192e7ea94cbddd0102291d50 Mon Sep 17 00:00:00 2001 From: Phuzzyhead Date: Fri, 27 Apr 2018 14:55:58 +0200 Subject: [PATCH 045/150] added debug output to track double-free issue --- include/ham/net/communicator_mpi_rma_dynamic.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/ham/net/communicator_mpi_rma_dynamic.hpp b/include/ham/net/communicator_mpi_rma_dynamic.hpp index 556f75e..fd6003c 100644 --- a/include/ham/net/communicator_mpi_rma_dynamic.hpp +++ b/include/ham/net/communicator_mpi_rma_dynamic.hpp @@ -569,8 +569,8 @@ class communicator { assert(ptr.node() == this_node_); // NOTE: no dtor is called // remove from own rma window - HAM_DEBUG( HAM_LOG << "freeing buffer @: " << ptr << std::endl; ) - HAM_DEBUG( HAM_LOG << "on node: " << source_node << std::endl; ) + HAM_DEBUG( HAM_LOG << "freeing buffer @: " << (long)ptr << std::endl; ) + HAM_DEBUG( HAM_LOG << "on node: " << this_node_n << std::endl; ) MPI_Win_detach(peers[this_node_].rma_data_win, ptr.get()); /* for (node_t i = 1; i < nodes_; ++i) { // nonsense, all accesses to a rank will only take place on that targets window, no need to attach to other MPI_Win_detach(peers[i].rma_data_win, ptr.get()); From e31cb9bcc0a0945e7f4eef7f31f406877e63bda9 Mon Sep 17 00:00:00 2001 From: Phuzzyhead Date: Fri, 27 Apr 2018 14:57:06 +0200 Subject: [PATCH 046/150] added debug output to track double-free issue --- include/ham/net/communicator_mpi_rma_dynamic.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/ham/net/communicator_mpi_rma_dynamic.hpp b/include/ham/net/communicator_mpi_rma_dynamic.hpp index fd6003c..845966e 100644 --- a/include/ham/net/communicator_mpi_rma_dynamic.hpp +++ b/include/ham/net/communicator_mpi_rma_dynamic.hpp @@ -570,7 +570,7 @@ class communicator { // NOTE: no dtor is called // remove from own rma window HAM_DEBUG( HAM_LOG << "freeing buffer @: " << (long)ptr << std::endl; ) - HAM_DEBUG( HAM_LOG << "on node: " << this_node_n << std::endl; ) + HAM_DEBUG( HAM_LOG << "on node: " << this_node_ << std::endl; ) MPI_Win_detach(peers[this_node_].rma_data_win, ptr.get()); /* for (node_t i = 1; i < nodes_; ++i) { // nonsense, all accesses to a rank will only take place on that targets window, no need to attach to other MPI_Win_detach(peers[i].rma_data_win, ptr.get()); From fbaaf63e9376fc05054aa923d0b3685a12799683 Mon Sep 17 00:00:00 2001 From: Phuzzyhead Date: Fri, 27 Apr 2018 14:59:01 +0200 Subject: [PATCH 047/150] added debug output to track double-free issue --- include/ham/net/communicator_mpi_rma_dynamic.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/ham/net/communicator_mpi_rma_dynamic.hpp b/include/ham/net/communicator_mpi_rma_dynamic.hpp index 845966e..44c5066 100644 --- a/include/ham/net/communicator_mpi_rma_dynamic.hpp +++ b/include/ham/net/communicator_mpi_rma_dynamic.hpp @@ -537,7 +537,7 @@ class communicator { //int err = posix_memalign((void**)&ptr, constants::CACHE_LINE_SIZE, n * sizeof(T)); // attach to own window - HAM_DEBUG( HAM_LOG << "allocated buffer @: " << ptr << std::endl; ) + HAM_DEBUG( HAM_LOG << "allocated buffer @: " << ptr.get() << std::endl; ) HAM_DEBUG( HAM_LOG << "on node: " << source_node << std::endl; ) MPI_Win_attach(peers[this_node_].rma_data_win, (void*)ptr, n * sizeof(T)); /* for (node_t i = 1; i < nodes_; ++i) { // nonsense, all accesses to a rank will only take place on that targets window, no need to attach to other @@ -569,7 +569,7 @@ class communicator { assert(ptr.node() == this_node_); // NOTE: no dtor is called // remove from own rma window - HAM_DEBUG( HAM_LOG << "freeing buffer @: " << (long)ptr << std::endl; ) + HAM_DEBUG( HAM_LOG << "freeing buffer @: " << ptr.get() << std::endl; ) HAM_DEBUG( HAM_LOG << "on node: " << this_node_ << std::endl; ) MPI_Win_detach(peers[this_node_].rma_data_win, ptr.get()); /* for (node_t i = 1; i < nodes_; ++i) { // nonsense, all accesses to a rank will only take place on that targets window, no need to attach to other From 383eb93c64667e4e350f8f5b92e850d34e840e81 Mon Sep 17 00:00:00 2001 From: Phuzzyhead Date: Fri, 27 Apr 2018 15:00:38 +0200 Subject: [PATCH 048/150] added debug output to track double-free issue --- include/ham/net/communicator_mpi_rma_dynamic.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/ham/net/communicator_mpi_rma_dynamic.hpp b/include/ham/net/communicator_mpi_rma_dynamic.hpp index 44c5066..3bd2d16 100644 --- a/include/ham/net/communicator_mpi_rma_dynamic.hpp +++ b/include/ham/net/communicator_mpi_rma_dynamic.hpp @@ -537,7 +537,7 @@ class communicator { //int err = posix_memalign((void**)&ptr, constants::CACHE_LINE_SIZE, n * sizeof(T)); // attach to own window - HAM_DEBUG( HAM_LOG << "allocated buffer @: " << ptr.get() << std::endl; ) + HAM_DEBUG( HAM_LOG << "allocated buffer @: " << ptr << std::endl; ) HAM_DEBUG( HAM_LOG << "on node: " << source_node << std::endl; ) MPI_Win_attach(peers[this_node_].rma_data_win, (void*)ptr, n * sizeof(T)); /* for (node_t i = 1; i < nodes_; ++i) { // nonsense, all accesses to a rank will only take place on that targets window, no need to attach to other From d2b08f54224039aa4f35c6b1cf492974fc2ac941 Mon Sep 17 00:00:00 2001 From: Phuzzyhead Date: Fri, 27 Apr 2018 15:10:13 +0200 Subject: [PATCH 049/150] added debug output to track double-free issue --- include/ham/net/communicator_mpi_rma_dynamic.hpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/include/ham/net/communicator_mpi_rma_dynamic.hpp b/include/ham/net/communicator_mpi_rma_dynamic.hpp index 3bd2d16..1798bc5 100644 --- a/include/ham/net/communicator_mpi_rma_dynamic.hpp +++ b/include/ham/net/communicator_mpi_rma_dynamic.hpp @@ -537,8 +537,7 @@ class communicator { //int err = posix_memalign((void**)&ptr, constants::CACHE_LINE_SIZE, n * sizeof(T)); // attach to own window - HAM_DEBUG( HAM_LOG << "allocated buffer @: " << ptr << std::endl; ) - HAM_DEBUG( HAM_LOG << "on node: " << source_node << std::endl; ) + HAM_DEBUG( HAM_LOG << "allocating buffer @: " << (long)ptr << "belonging to node: " << source_node << std::endl; ) MPI_Win_attach(peers[this_node_].rma_data_win, (void*)ptr, n * sizeof(T)); /* for (node_t i = 1; i < nodes_; ++i) { // nonsense, all accesses to a rank will only take place on that targets window, no need to attach to other MPI_Win_attach(peers[i].rma_data_win, (void*)ptr, n * sizeof(T)); @@ -569,8 +568,7 @@ class communicator { assert(ptr.node() == this_node_); // NOTE: no dtor is called // remove from own rma window - HAM_DEBUG( HAM_LOG << "freeing buffer @: " << ptr.get() << std::endl; ) - HAM_DEBUG( HAM_LOG << "on node: " << this_node_ << std::endl; ) + HAM_DEBUG( HAM_LOG << "freeing buffer @: " << (long)ptr.get() << " belonging to node: " << ptr.node() << std::endl; ) MPI_Win_detach(peers[this_node_].rma_data_win, ptr.get()); /* for (node_t i = 1; i < nodes_; ++i) { // nonsense, all accesses to a rank will only take place on that targets window, no need to attach to other MPI_Win_detach(peers[i].rma_data_win, ptr.get()); From 26926631f057e5f8a233c0c02d43bdde9139b13f Mon Sep 17 00:00:00 2001 From: Phuzzyhead Date: Fri, 27 Apr 2018 16:28:36 +0200 Subject: [PATCH 050/150] introduced alternate request allocation for data transfers --- include/ham/net/communicator_mpi_rma_dynamic.hpp | 10 ++++++++-- include/ham/offload/offload.hpp | 4 ++-- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/include/ham/net/communicator_mpi_rma_dynamic.hpp b/include/ham/net/communicator_mpi_rma_dynamic.hpp index 1798bc5..fce6410 100644 --- a/include/ham/net/communicator_mpi_rma_dynamic.hpp +++ b/include/ham/net/communicator_mpi_rma_dynamic.hpp @@ -358,6 +358,12 @@ class communicator { return peers[remote_node].next_request; } + // used for async rma data transfers, so they wont take up buffer indices they dont need + request allocate_data_request(node_t remote_node) { + HAM_DEBUG( HAM_LOG << "communicator::allocate_next_request(): remote_node = " << remote_node << std::endl; ) + return { remote_node, this_node_, NO_BUFFER_INDEX, NO_BUFFER_INDEX }; + } + // only used by host void free_request(request& req) { @@ -537,7 +543,7 @@ class communicator { //int err = posix_memalign((void**)&ptr, constants::CACHE_LINE_SIZE, n * sizeof(T)); // attach to own window - HAM_DEBUG( HAM_LOG << "allocating buffer @: " << (long)ptr << "belonging to node: " << source_node << std::endl; ) + HAM_DEBUG( HAM_LOG << "communicator::allocate_buffer(), allocating buffer @: " << (long)ptr << " belonging to node: " << source_node << std::endl; ) MPI_Win_attach(peers[this_node_].rma_data_win, (void*)ptr, n * sizeof(T)); /* for (node_t i = 1; i < nodes_; ++i) { // nonsense, all accesses to a rank will only take place on that targets window, no need to attach to other MPI_Win_attach(peers[i].rma_data_win, (void*)ptr, n * sizeof(T)); @@ -568,7 +574,7 @@ class communicator { assert(ptr.node() == this_node_); // NOTE: no dtor is called // remove from own rma window - HAM_DEBUG( HAM_LOG << "freeing buffer @: " << (long)ptr.get() << " belonging to node: " << ptr.node() << std::endl; ) + HAM_DEBUG( HAM_LOG << "communicator::allocate_buffer(), freeing buffer @: " << (long)ptr.get() << " belonging to node: " << ptr.node() << std::endl; ) MPI_Win_detach(peers[this_node_].rma_data_win, ptr.get()); /* for (node_t i = 1; i < nodes_; ++i) { // nonsense, all accesses to a rank will only take place on that targets window, no need to attach to other MPI_Win_detach(peers[i].rma_data_win, ptr.get()); diff --git a/include/ham/offload/offload.hpp b/include/ham/offload/offload.hpp index 1c2e78c..a315c50 100644 --- a/include/ham/offload/offload.hpp +++ b/include/ham/offload/offload.hpp @@ -235,7 +235,7 @@ future put(T* local_source, buffer_ptr& remote_dest, size_t n) return result; #elif HAM_COMM_MPI_RMA_DYNAMIC - future result(comm.allocate_request(remote_dest.node())); + future result(comm.allocate_data_request(remote_dest.node())); HAM_DEBUG( HAM_LOG << "offload::put(): initiating RMA put..." << std::endl; ) comm.send_data_async(result.get_request(), local_source, remote_dest, n); return result; @@ -273,7 +273,7 @@ future get(buffer_ptr remote_source, T* local_dest, size_t n) return result; #elif defined HAM_COMM_MPI_RMA_DYNAMIC - future result(comm.allocate_request(remote_source.node())); + future result(comm.allocate_data_request(remote_source.node())); HAM_DEBUG( HAM_LOG << "offload::put(): initiating RMA get..." << std::endl; ) comm.recv_data_async(result.get_request(), remote_source, local_dest, n); return result; From ebc3feba9b8d8b5d0460c8fbf392edabdf1b9c17 Mon Sep 17 00:00:00 2001 From: Phuzzyhead Date: Fri, 27 Apr 2018 17:10:53 +0200 Subject: [PATCH 051/150] fixed data requests freeing indices @ invalidation --- include/ham/net/communicator_mpi_rma_dynamic.hpp | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/include/ham/net/communicator_mpi_rma_dynamic.hpp b/include/ham/net/communicator_mpi_rma_dynamic.hpp index fce6410..87ef0a4 100644 --- a/include/ham/net/communicator_mpi_rma_dynamic.hpp +++ b/include/ham/net/communicator_mpi_rma_dynamic.hpp @@ -383,8 +383,14 @@ class communicator { MPI_Put(&remote_flag, 1, MPI_INT64_T, req.target_node, 0, 1, MPI_INT64_T, peer.flag_win); // flush? don't think so - peer.remote_buffer_pool.free(req.remote_buffer_index); - peer.local_buffer_pool.free(req.local_buffer_index); + // only free buffer indices if they are valid + // necessary to avoid data transfer requests that do not allocate indices messing up the index pools + if(req.remote_buffer_index < NO_BUFFER_INDEX ) { + peer.remote_buffer_pool.free(req.remote_buffer_index); + } + if(req.local_buffer_index < NO_BUFFER_INDEX) { + peer.local_buffer_pool.free(req.local_buffer_index); + } req.valid_ = false; } @@ -574,7 +580,7 @@ class communicator { assert(ptr.node() == this_node_); // NOTE: no dtor is called // remove from own rma window - HAM_DEBUG( HAM_LOG << "communicator::allocate_buffer(), freeing buffer @: " << (long)ptr.get() << " belonging to node: " << ptr.node() << std::endl; ) + HAM_DEBUG( HAM_LOG << "communicator::free_buffer(), freeing buffer @: " << (long)ptr.get() << " belonging to node: " << ptr.node() << std::endl; ) MPI_Win_detach(peers[this_node_].rma_data_win, ptr.get()); /* for (node_t i = 1; i < nodes_; ++i) { // nonsense, all accesses to a rank will only take place on that targets window, no need to attach to other MPI_Win_detach(peers[i].rma_data_win, ptr.get()); From c0894ff6f469015768325596b099a968b84e2a04 Mon Sep 17 00:00:00 2001 From: Phuzzyhead Date: Fri, 27 Apr 2018 17:21:55 +0200 Subject: [PATCH 052/150] fixed data requests freeing indices @ invalidation --- include/ham/net/communicator_mpi_rma_dynamic.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/ham/net/communicator_mpi_rma_dynamic.hpp b/include/ham/net/communicator_mpi_rma_dynamic.hpp index 87ef0a4..9d46ace 100644 --- a/include/ham/net/communicator_mpi_rma_dynamic.hpp +++ b/include/ham/net/communicator_mpi_rma_dynamic.hpp @@ -380,7 +380,7 @@ class communicator { *local_flag= FLAG_FALSE; // remote flag on target size_t remote_flag = FLAG_FALSE; - MPI_Put(&remote_flag, 1, MPI_INT64_T, req.target_node, 0, 1, MPI_INT64_T, peer.flag_win); + MPI_Put(&remote_flag, sizeof(remote_flag), MPI_BYTE, req.target_node, 0, sizeof(remote_flag), MPI_BYTE, peer.flag_win); // flush? don't think so // only free buffer indices if they are valid From 49179641e1abfde85da4256891646bdba39b65c3 Mon Sep 17 00:00:00 2001 From: Phuzzyhead Date: Fri, 27 Apr 2018 17:30:13 +0200 Subject: [PATCH 053/150] fixed data requests freeing indices @ invalidation --- .../ham/net/communicator_mpi_rma_dynamic.hpp | 25 ++++++++++--------- 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/include/ham/net/communicator_mpi_rma_dynamic.hpp b/include/ham/net/communicator_mpi_rma_dynamic.hpp index 9d46ace..e5f7f3e 100644 --- a/include/ham/net/communicator_mpi_rma_dynamic.hpp +++ b/include/ham/net/communicator_mpi_rma_dynamic.hpp @@ -369,8 +369,13 @@ class communicator { { assert(req.valid()); assert(req.source_node == this_node_); - - mpi_peer& peer = peers[req.target_node]; + + // dont do any of the following for data transfer requests + if(req.remote_buffer_index == NO_BUFFER_INDEX ) { + return; + } + + mpi_peer& peer = peers[req.target_node]; // set flags to false // local flag inside large host flag buffer @ peers[host] @@ -383,17 +388,13 @@ class communicator { MPI_Put(&remote_flag, sizeof(remote_flag), MPI_BYTE, req.target_node, 0, sizeof(remote_flag), MPI_BYTE, peer.flag_win); // flush? don't think so - // only free buffer indices if they are valid - // necessary to avoid data transfer requests that do not allocate indices messing up the index pools - if(req.remote_buffer_index < NO_BUFFER_INDEX ) { - peer.remote_buffer_pool.free(req.remote_buffer_index); - } - if(req.local_buffer_index < NO_BUFFER_INDEX) { - peer.local_buffer_pool.free(req.local_buffer_index); - } - req.valid_ = false; - } + peer.remote_buffer_pool.free(req.remote_buffer_index); + + peer.local_buffer_pool.free(req.local_buffer_index); + + req.valid_ = false; + } public: // make private?! From 112e5b883b05f7a0e410389b3a651b88c3ba58b5 Mon Sep 17 00:00:00 2001 From: Phuzzyhead Date: Fri, 27 Apr 2018 17:37:53 +0200 Subject: [PATCH 054/150] fixed data requests freeing indices @ invalidation --- include/ham/net/communicator_mpi_rma_dynamic.hpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/include/ham/net/communicator_mpi_rma_dynamic.hpp b/include/ham/net/communicator_mpi_rma_dynamic.hpp index e5f7f3e..b44ff62 100644 --- a/include/ham/net/communicator_mpi_rma_dynamic.hpp +++ b/include/ham/net/communicator_mpi_rma_dynamic.hpp @@ -377,6 +377,7 @@ class communicator { mpi_peer& peer = peers[req.target_node]; + // set flags to false // local flag inside large host flag buffer @ peers[host] // index offset computed using target node @@ -384,10 +385,11 @@ class communicator { volatile size_t* local_flag = reinterpret_cast(&peers[host_node_].flag_data.get()[offset + req.local_buffer_index]); *local_flag= FLAG_FALSE; // remote flag on target + /* This is done by the target after having reveived the new index to poll on size_t remote_flag = FLAG_FALSE; MPI_Put(&remote_flag, sizeof(remote_flag), MPI_BYTE, req.target_node, 0, sizeof(remote_flag), MPI_BYTE, peer.flag_win); // flush? don't think so - + */ peer.remote_buffer_pool.free(req.remote_buffer_index); @@ -451,6 +453,8 @@ class communicator { if (*local_flag != NO_BUFFER_INDEX) // the flag contains the next buffer index to poll on peers[node].next_flag = *local_flag; + *local_flag = FLAG_FALSE; + if (this_node_ == host_node_) { size_t offset = constants::MSG_BUFFERS * node; return &peers[host_node_].msg_data.get()[offset + buffer_index]; From 314423e0060dfbea7114b63c3f0c25b882544287 Mon Sep 17 00:00:00 2001 From: Phuzzyhead Date: Fri, 27 Apr 2018 18:12:28 +0200 Subject: [PATCH 055/150] trying withou flush --- include/ham/net/communicator_mpi_rma_dynamic.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/ham/net/communicator_mpi_rma_dynamic.hpp b/include/ham/net/communicator_mpi_rma_dynamic.hpp index b44ff62..5b80fdd 100644 --- a/include/ham/net/communicator_mpi_rma_dynamic.hpp +++ b/include/ham/net/communicator_mpi_rma_dynamic.hpp @@ -407,7 +407,7 @@ class communicator { MPI_Put(msg, size, MPI_BYTE, node, sizeof(msg_buffer) * buffer_index, size, MPI_BYTE, peers[node].msg_win); // TODO DANIEL: because MPI does not guarantee order on RMA ops, there might be a FLUSH necessary here - MPI_Win_flush(node, peers[node].msg_win); + //MPI_Win_flush(node, peers[node].msg_win); // write flag to target flags buffer // not sure on the size here? MPI_Put(&next_buffer_index, sizeof(next_buffer_index), MPI_BYTE, node, sizeof(cache_line_buffer) * buffer_index, sizeof(next_buffer_index), MPI_BYTE, peers[node].flag_win); @@ -554,7 +554,7 @@ class communicator { //int err = posix_memalign((void**)&ptr, constants::CACHE_LINE_SIZE, n * sizeof(T)); // attach to own window - HAM_DEBUG( HAM_LOG << "communicator::allocate_buffer(), allocating buffer @: " << (long)ptr << " belonging to node: " << source_node << std::endl; ) + HAM_DEBUG( HAM_LOG << "communicator::allocate_buffer(), allocating buffer @: " << (long)ptr << std::endl; ) MPI_Win_attach(peers[this_node_].rma_data_win, (void*)ptr, n * sizeof(T)); /* for (node_t i = 1; i < nodes_; ++i) { // nonsense, all accesses to a rank will only take place on that targets window, no need to attach to other MPI_Win_attach(peers[i].rma_data_win, (void*)ptr, n * sizeof(T)); From f0ebfd5e128c05cc43eaabe30dd3631bdb40af8c Mon Sep 17 00:00:00 2001 From: Phuzzyhead Date: Fri, 27 Apr 2018 18:20:06 +0200 Subject: [PATCH 056/150] trying withou flush --- include/ham/net/communicator_mpi_rma_dynamic.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/ham/net/communicator_mpi_rma_dynamic.hpp b/include/ham/net/communicator_mpi_rma_dynamic.hpp index 5b80fdd..1eb5d7a 100644 --- a/include/ham/net/communicator_mpi_rma_dynamic.hpp +++ b/include/ham/net/communicator_mpi_rma_dynamic.hpp @@ -414,7 +414,7 @@ class communicator { } else { // to host, used by send_result size_t offset = constants::MSG_BUFFERS * this_node_; MPI_Put(msg, size, MPI_BYTE, node, sizeof(msg_buffer) * (offset + buffer_index), size, MPI_BYTE, peers[node].msg_win); - MPI_Win_flush(node, peers[node].msg_win); + //MPI_Win_flush(node, peers[node].msg_win); MPI_Put(&next_buffer_index, sizeof(next_buffer_index), MPI_BYTE, node, sizeof(cache_line_buffer) * (offset + buffer_index), sizeof(next_buffer_index), MPI_BYTE, peers[node].flag_win); } } From b2ebd109b693e82557f91837ec9d84f6bb46cada Mon Sep 17 00:00:00 2001 From: Phuzzyhead Date: Wed, 2 May 2018 12:28:11 +0200 Subject: [PATCH 057/150] added logging to send/recv msg --- .../ham/net/communicator_mpi_rma_dynamic.hpp | 23 ++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/include/ham/net/communicator_mpi_rma_dynamic.hpp b/include/ham/net/communicator_mpi_rma_dynamic.hpp index 1eb5d7a..aec8598 100644 --- a/include/ham/net/communicator_mpi_rma_dynamic.hpp +++ b/include/ham/net/communicator_mpi_rma_dynamic.hpp @@ -403,19 +403,33 @@ class communicator { // called by func below void send_msg(node_t node, size_t buffer_index, size_t next_buffer_index, void* msg, size_t size) { // write msg to target msg buffer + HAM_DEBUG( HAM_LOG << "communicator::send_msg(): node = " << node << std::endl; ) + HAM_DEBUG( HAM_LOG << "communicator::send_msg(): remote buffer index = " << buffer_index << std::endl; ) + if (node != host_node_) { // to targets MPI_Put(msg, size, MPI_BYTE, node, sizeof(msg_buffer) * buffer_index, size, MPI_BYTE, peers[node].msg_win); + HAM_DEBUG( HAM_LOG << "communicator::send_msg(): wrote msg" << std::endl; ) // TODO DANIEL: because MPI does not guarantee order on RMA ops, there might be a FLUSH necessary here - //MPI_Win_flush(node, peers[node].msg_win); + MPI_Win_flush(node, peers[node].msg_win); + HAM_DEBUG( HAM_LOG << "communicator::send_msg(): flushed msg" << std::endl; ) + // write flag to target flags buffer // not sure on the size here? MPI_Put(&next_buffer_index, sizeof(next_buffer_index), MPI_BYTE, node, sizeof(cache_line_buffer) * buffer_index, sizeof(next_buffer_index), MPI_BYTE, peers[node].flag_win); + HAM_DEBUG( HAM_LOG << "communicator::send_msg(): wrote flag" << std::endl; ) + } else { // to host, used by send_result size_t offset = constants::MSG_BUFFERS * this_node_; MPI_Put(msg, size, MPI_BYTE, node, sizeof(msg_buffer) * (offset + buffer_index), size, MPI_BYTE, peers[node].msg_win); - //MPI_Win_flush(node, peers[node].msg_win); + HAM_DEBUG( HAM_LOG << "communicator::send_msg(): wrote msg" << std::endl; ) + + MPI_Win_flush(node, peers[node].msg_win); + HAM_DEBUG( HAM_LOG << "communicator::send_msg(): flushed msg" << std::endl; ) + MPI_Put(&next_buffer_index, sizeof(next_buffer_index), MPI_BYTE, node, sizeof(cache_line_buffer) * (offset + buffer_index), sizeof(next_buffer_index), MPI_BYTE, peers[node].flag_win); + HAM_DEBUG( HAM_LOG << "communicator::send_msg(): wrote flag" << std::endl; ) + } } @@ -437,6 +451,8 @@ class communicator { void* recv_msg(node_t node, size_t buffer_index = NO_BUFFER_INDEX, void* msg = nullptr, size_t size = constants::MSG_SIZE) { buffer_index = buffer_index == NO_BUFFER_INDEX ? peers[node].next_flag : buffer_index; + HAM_DEBUG( HAM_LOG << "communicator::recv_msg(): remote node is: " << node << std::endl; ) + HAM_DEBUG( HAM_LOG << "communicator::recv_msg(): using buffer index: " << buffer_index << std::endl; ) volatile size_t* local_flag; @@ -447,8 +463,9 @@ class communicator { local_flag = reinterpret_cast(&peers[this_node_].flag_data.get()[buffer_index]); } - + HAM_DEBUG( HAM_LOG << "communicator::recv_msg(): FLAG before polling: " << (int)*local_flag << std::endl; ) while (*local_flag == FLAG_FALSE); // poll on flag for completion + HAM_DEBUG( HAM_LOG << "communicator::recv_msg(): FLAG after polling: " << (int)*local_flag << std::endl; ) if (*local_flag != NO_BUFFER_INDEX) // the flag contains the next buffer index to poll on peers[node].next_flag = *local_flag; From d2c555291fc381b3e5b784971a49008eaf92a83c Mon Sep 17 00:00:00 2001 From: Phuzzyhead Date: Wed, 2 May 2018 13:33:37 +0200 Subject: [PATCH 058/150] added time logging to recv msg --- include/ham/net/communicator_mpi_rma_dynamic.hpp | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/include/ham/net/communicator_mpi_rma_dynamic.hpp b/include/ham/net/communicator_mpi_rma_dynamic.hpp index aec8598..e412cc1 100644 --- a/include/ham/net/communicator_mpi_rma_dynamic.hpp +++ b/include/ham/net/communicator_mpi_rma_dynamic.hpp @@ -17,6 +17,7 @@ #include "ham/misc/types.hpp" #include "ham/util/debug.hpp" #include "ham/util/log.hpp" +#include "ham/util/time.hpp" #include "communicator.hpp" namespace ham { @@ -450,6 +451,9 @@ class communicator { // called by function below void* recv_msg(node_t node, size_t buffer_index = NO_BUFFER_INDEX, void* msg = nullptr, size_t size = constants::MSG_SIZE) { + statistics pre_poll(1,0); + statistics poll(1,0); + timer t1; buffer_index = buffer_index == NO_BUFFER_INDEX ? peers[node].next_flag : buffer_index; HAM_DEBUG( HAM_LOG << "communicator::recv_msg(): remote node is: " << node << std::endl; ) HAM_DEBUG( HAM_LOG << "communicator::recv_msg(): using buffer index: " << buffer_index << std::endl; ) @@ -464,8 +468,14 @@ class communicator { } HAM_DEBUG( HAM_LOG << "communicator::recv_msg(): FLAG before polling: " << (int)*local_flag << std::endl; ) + pre_poll.add(t1); + HAM_DEBUG( HAM_LOG << "communicator::recv_msg(): pre-polling took: " << pre_poll.min() << std::endl; ) + timer t2; while (*local_flag == FLAG_FALSE); // poll on flag for completion + poll.add(t2); HAM_DEBUG( HAM_LOG << "communicator::recv_msg(): FLAG after polling: " << (int)*local_flag << std::endl; ) + HAM_DEBUG( HAM_LOG << "communicator::recv_msg(): polling took: " << poll.min() << std::endl; ) + if (*local_flag != NO_BUFFER_INDEX) // the flag contains the next buffer index to poll on peers[node].next_flag = *local_flag; From fdc1b9e0f74ea46f05901c4de1c00bbabb72ecfe Mon Sep 17 00:00:00 2001 From: Phuzzyhead Date: Wed, 2 May 2018 13:39:30 +0200 Subject: [PATCH 059/150] added time logging to recv msg --- include/ham/net/communicator_mpi_rma_dynamic.hpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/ham/net/communicator_mpi_rma_dynamic.hpp b/include/ham/net/communicator_mpi_rma_dynamic.hpp index e412cc1..0ad10bc 100644 --- a/include/ham/net/communicator_mpi_rma_dynamic.hpp +++ b/include/ham/net/communicator_mpi_rma_dynamic.hpp @@ -451,9 +451,9 @@ class communicator { // called by function below void* recv_msg(node_t node, size_t buffer_index = NO_BUFFER_INDEX, void* msg = nullptr, size_t size = constants::MSG_SIZE) { - statistics pre_poll(1,0); - statistics poll(1,0); - timer t1; + ham::util::time::statistics pre_poll(1,0); + ham::util::time::statistics poll(1,0); + ham::util::time::timer t1; buffer_index = buffer_index == NO_BUFFER_INDEX ? peers[node].next_flag : buffer_index; HAM_DEBUG( HAM_LOG << "communicator::recv_msg(): remote node is: " << node << std::endl; ) HAM_DEBUG( HAM_LOG << "communicator::recv_msg(): using buffer index: " << buffer_index << std::endl; ) @@ -470,7 +470,7 @@ class communicator { HAM_DEBUG( HAM_LOG << "communicator::recv_msg(): FLAG before polling: " << (int)*local_flag << std::endl; ) pre_poll.add(t1); HAM_DEBUG( HAM_LOG << "communicator::recv_msg(): pre-polling took: " << pre_poll.min() << std::endl; ) - timer t2; + ham::util::time::timer t2; while (*local_flag == FLAG_FALSE); // poll on flag for completion poll.add(t2); HAM_DEBUG( HAM_LOG << "communicator::recv_msg(): FLAG after polling: " << (int)*local_flag << std::endl; ) From a4039134d52446c1fabcfd1854e631e9aa7ed873 Mon Sep 17 00:00:00 2001 From: Phuzzyhead Date: Wed, 2 May 2018 13:41:45 +0200 Subject: [PATCH 060/150] added time logging to recv msg --- include/ham/net/communicator_mpi_rma_dynamic.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/ham/net/communicator_mpi_rma_dynamic.hpp b/include/ham/net/communicator_mpi_rma_dynamic.hpp index 0ad10bc..2385ac0 100644 --- a/include/ham/net/communicator_mpi_rma_dynamic.hpp +++ b/include/ham/net/communicator_mpi_rma_dynamic.hpp @@ -469,12 +469,12 @@ class communicator { HAM_DEBUG( HAM_LOG << "communicator::recv_msg(): FLAG before polling: " << (int)*local_flag << std::endl; ) pre_poll.add(t1); - HAM_DEBUG( HAM_LOG << "communicator::recv_msg(): pre-polling took: " << pre_poll.min() << std::endl; ) + HAM_DEBUG( HAM_LOG << "communicator::recv_msg(): pre-polling took: " << pre_poll.min().count() << std::endl; ) ham::util::time::timer t2; while (*local_flag == FLAG_FALSE); // poll on flag for completion poll.add(t2); HAM_DEBUG( HAM_LOG << "communicator::recv_msg(): FLAG after polling: " << (int)*local_flag << std::endl; ) - HAM_DEBUG( HAM_LOG << "communicator::recv_msg(): polling took: " << poll.min() << std::endl; ) + HAM_DEBUG( HAM_LOG << "communicator::recv_msg(): polling took: " << poll.min().count() << std::endl; ) if (*local_flag != NO_BUFFER_INDEX) // the flag contains the next buffer index to poll on From 6e84d1768d479dcdfbcbd8d961b06c6f75d8648c Mon Sep 17 00:00:00 2001 From: Phuzzyhead Date: Wed, 2 May 2018 13:48:55 +0200 Subject: [PATCH 061/150] added time logging to send msg --- include/ham/net/communicator_mpi_rma_dynamic.hpp | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/include/ham/net/communicator_mpi_rma_dynamic.hpp b/include/ham/net/communicator_mpi_rma_dynamic.hpp index 2385ac0..eebd2f2 100644 --- a/include/ham/net/communicator_mpi_rma_dynamic.hpp +++ b/include/ham/net/communicator_mpi_rma_dynamic.hpp @@ -408,17 +408,31 @@ class communicator { HAM_DEBUG( HAM_LOG << "communicator::send_msg(): remote buffer index = " << buffer_index << std::endl; ) if (node != host_node_) { // to targets + ham::util::time::statistics msg_put(1,0); + ham::util::time::statistics flush(1,0); + ham::util::time::statistics flag_put(1,0); + + ham::util::time::timer t1; MPI_Put(msg, size, MPI_BYTE, node, sizeof(msg_buffer) * buffer_index, size, MPI_BYTE, peers[node].msg_win); + msg_put.add(t1); HAM_DEBUG( HAM_LOG << "communicator::send_msg(): wrote msg" << std::endl; ) + HAM_DEBUG( HAM_LOG << "communicator::send_msg(): writing msg took" << msg_put.min().count() << std::endl; ) + // TODO DANIEL: because MPI does not guarantee order on RMA ops, there might be a FLUSH necessary here + ham::util::time::timer t2; MPI_Win_flush(node, peers[node].msg_win); + flush.add(t2); HAM_DEBUG( HAM_LOG << "communicator::send_msg(): flushed msg" << std::endl; ) + HAM_DEBUG( HAM_LOG << "communicator::send_msg(): flushing msg took" << flush.min().count() << std::endl; ) // write flag to target flags buffer // not sure on the size here? + ham::util::time::timer t3; MPI_Put(&next_buffer_index, sizeof(next_buffer_index), MPI_BYTE, node, sizeof(cache_line_buffer) * buffer_index, sizeof(next_buffer_index), MPI_BYTE, peers[node].flag_win); + flag_put.add(t3); HAM_DEBUG( HAM_LOG << "communicator::send_msg(): wrote flag" << std::endl; ) + HAM_DEBUG( HAM_LOG << "communicator::send_msg(): writing flag took" << flag_put.min().count() < Date: Wed, 2 May 2018 13:54:59 +0200 Subject: [PATCH 062/150] commented out Win_flush when putting msg --- include/ham/net/communicator_mpi_rma_dynamic.hpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/include/ham/net/communicator_mpi_rma_dynamic.hpp b/include/ham/net/communicator_mpi_rma_dynamic.hpp index eebd2f2..96b7483 100644 --- a/include/ham/net/communicator_mpi_rma_dynamic.hpp +++ b/include/ham/net/communicator_mpi_rma_dynamic.hpp @@ -416,15 +416,15 @@ class communicator { MPI_Put(msg, size, MPI_BYTE, node, sizeof(msg_buffer) * buffer_index, size, MPI_BYTE, peers[node].msg_win); msg_put.add(t1); HAM_DEBUG( HAM_LOG << "communicator::send_msg(): wrote msg" << std::endl; ) - HAM_DEBUG( HAM_LOG << "communicator::send_msg(): writing msg took" << msg_put.min().count() << std::endl; ) + HAM_DEBUG( HAM_LOG << "communicator::send_msg(): writing msg took: " << msg_put.min().count() << std::endl; ) // TODO DANIEL: because MPI does not guarantee order on RMA ops, there might be a FLUSH necessary here ham::util::time::timer t2; - MPI_Win_flush(node, peers[node].msg_win); + //MPI_Win_flush(node, peers[node].msg_win); flush.add(t2); HAM_DEBUG( HAM_LOG << "communicator::send_msg(): flushed msg" << std::endl; ) - HAM_DEBUG( HAM_LOG << "communicator::send_msg(): flushing msg took" << flush.min().count() << std::endl; ) + HAM_DEBUG( HAM_LOG << "communicator::send_msg(): flushing msg took: " << flush.min().count() << std::endl; ) // write flag to target flags buffer // not sure on the size here? @@ -432,14 +432,14 @@ class communicator { MPI_Put(&next_buffer_index, sizeof(next_buffer_index), MPI_BYTE, node, sizeof(cache_line_buffer) * buffer_index, sizeof(next_buffer_index), MPI_BYTE, peers[node].flag_win); flag_put.add(t3); HAM_DEBUG( HAM_LOG << "communicator::send_msg(): wrote flag" << std::endl; ) - HAM_DEBUG( HAM_LOG << "communicator::send_msg(): writing flag took" << flag_put.min().count() < Date: Wed, 2 May 2018 13:58:49 +0200 Subject: [PATCH 063/150] added time logging to send msg (target) --- include/ham/net/communicator_mpi_rma_dynamic.hpp | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/include/ham/net/communicator_mpi_rma_dynamic.hpp b/include/ham/net/communicator_mpi_rma_dynamic.hpp index 96b7483..3eeb2e6 100644 --- a/include/ham/net/communicator_mpi_rma_dynamic.hpp +++ b/include/ham/net/communicator_mpi_rma_dynamic.hpp @@ -435,15 +435,28 @@ class communicator { HAM_DEBUG( HAM_LOG << "communicator::send_msg(): writing flag took: " << flag_put.min().count() < Date: Wed, 2 May 2018 14:58:33 +0200 Subject: [PATCH 064/150] removed unneeded target-target window locks --- include/ham/net/communicator_mpi_rma_dynamic.hpp | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/include/ham/net/communicator_mpi_rma_dynamic.hpp b/include/ham/net/communicator_mpi_rma_dynamic.hpp index 3eeb2e6..852f34e 100644 --- a/include/ham/net/communicator_mpi_rma_dynamic.hpp +++ b/include/ham/net/communicator_mpi_rma_dynamic.hpp @@ -292,8 +292,16 @@ class communicator { for (node_t i = 0; i < nodes_; ++i) { // TODO(improvement): needs to be changed when host-rank becomes configurable if (i != this_node_) { MPI_Win_lock(MPI_LOCK_SHARED, i, 0, peers[i].rma_data_win); // shared locks because all ranks lock on every target concurrently - MPI_Win_lock(MPI_LOCK_SHARED, i, 0, peers[i].msg_win); // shared locks because all ranks lock on every target concurrently - MPI_Win_lock(MPI_LOCK_SHARED, i, 0, peers[i].flag_win); // shared locks because all ranks lock on every target concurrently + } + } + + if (this_node_ != host_node_) { // targets + MPI_Win_lock(MPI_LOCK_SHARED, 0, 0, peers[0].msg_win); + MPI_Win_lock(MPI_LOCK_SHARED, 0, 0, peers[0].flag_win); + } else { // host + for (node_t i = 0; i < nodes_; ++i) { + MPI_Win_lock(MPI_LOCK_SHARED, i, 0, peers[i].msg_win); + MPI_Win_lock(MPI_LOCK_SHARED, i, 0, peers[i].flag_win); } } From 178a870a9657a88a41fbf9ed3bdbd36c367f9be2 Mon Sep 17 00:00:00 2001 From: Phuzzyhead Date: Wed, 2 May 2018 15:43:46 +0200 Subject: [PATCH 065/150] commented Win_flush when putting msg back in --- include/ham/net/communicator_mpi_rma_dynamic.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/ham/net/communicator_mpi_rma_dynamic.hpp b/include/ham/net/communicator_mpi_rma_dynamic.hpp index 852f34e..493f3c7 100644 --- a/include/ham/net/communicator_mpi_rma_dynamic.hpp +++ b/include/ham/net/communicator_mpi_rma_dynamic.hpp @@ -429,7 +429,7 @@ class communicator { // TODO DANIEL: because MPI does not guarantee order on RMA ops, there might be a FLUSH necessary here ham::util::time::timer t2; - //MPI_Win_flush(node, peers[node].msg_win); + MPI_Win_flush(node, peers[node].msg_win); flush.add(t2); HAM_DEBUG( HAM_LOG << "communicator::send_msg(): flushed msg" << std::endl; ) HAM_DEBUG( HAM_LOG << "communicator::send_msg(): flushing msg took: " << flush.min().count() << std::endl; ) @@ -455,7 +455,7 @@ class communicator { HAM_DEBUG( HAM_LOG << "communicator::send_msg(): writing msg took: " << msg_put.min().count() << std::endl; ) ham::util::time::timer t2; - //MPI_Win_flush(node, peers[node].msg_win); + MPI_Win_flush(node, peers[node].msg_win); flush.add(t2); HAM_DEBUG( HAM_LOG << "communicator::send_msg(): flushed msg" << std::endl; ) HAM_DEBUG( HAM_LOG << "communicator::send_msg(): flushing msg took: " << flush.min().count() << std::endl; ) From 4ed3efe815f375aa21954aac3f6febac423024a1 Mon Sep 17 00:00:00 2001 From: bemdeppi Date: Mon, 9 Jul 2018 13:36:32 +0200 Subject: [PATCH 066/150] nonfunctional changes --- Jamroot | 4 +- .../ham/net/communicator_mpi_rma_dynamic.hpp | 66 +++++++++---------- tools/install_boost.sh | 4 +- 3 files changed, 37 insertions(+), 37 deletions(-) diff --git a/Jamroot b/Jamroot index f832e4e..16b4f72 100644 --- a/Jamroot +++ b/Jamroot @@ -32,13 +32,13 @@ project HAM $(INC) $(BOOST_PATH)/include debug:HAM_DEBUG_ON -# debug_mic:HAM_DEBUG_ON + debug_mic:HAM_DEBUG_ON # intel:"-static-intel" on # off, on, full #speed # off, speed, space "-hstd=c++11" multi -# static + static : default-build release # : default-build debug release debug_mic release_mic ; diff --git a/include/ham/net/communicator_mpi_rma_dynamic.hpp b/include/ham/net/communicator_mpi_rma_dynamic.hpp index 493f3c7..f3cee5a 100644 --- a/include/ham/net/communicator_mpi_rma_dynamic.hpp +++ b/include/ham/net/communicator_mpi_rma_dynamic.hpp @@ -17,7 +17,7 @@ #include "ham/misc/types.hpp" #include "ham/util/debug.hpp" #include "ham/util/log.hpp" -#include "ham/util/time.hpp" +// #include "ham/util/time.hpp" #include "communicator.hpp" namespace ham { @@ -416,55 +416,55 @@ class communicator { HAM_DEBUG( HAM_LOG << "communicator::send_msg(): remote buffer index = " << buffer_index << std::endl; ) if (node != host_node_) { // to targets - ham::util::time::statistics msg_put(1,0); - ham::util::time::statistics flush(1,0); - ham::util::time::statistics flag_put(1,0); + // ham::util::time::statistics msg_put(1,0); + // ham::util::time::statistics flush(1,0); + // ham::util::time::statistics flag_put(1,0); - ham::util::time::timer t1; + // ham::util::time::timer t1; MPI_Put(msg, size, MPI_BYTE, node, sizeof(msg_buffer) * buffer_index, size, MPI_BYTE, peers[node].msg_win); - msg_put.add(t1); + // msg_put.add(t1); HAM_DEBUG( HAM_LOG << "communicator::send_msg(): wrote msg" << std::endl; ) - HAM_DEBUG( HAM_LOG << "communicator::send_msg(): writing msg took: " << msg_put.min().count() << std::endl; ) + HAM_DEBUG( HAM_LOG << "communicator::send_msg(): writing msg took: " << ""/*msg_put.min().count()*/ << std::endl; ) // TODO DANIEL: because MPI does not guarantee order on RMA ops, there might be a FLUSH necessary here - ham::util::time::timer t2; + // ham::util::time::timer t2; MPI_Win_flush(node, peers[node].msg_win); - flush.add(t2); + // flush.add(t2); HAM_DEBUG( HAM_LOG << "communicator::send_msg(): flushed msg" << std::endl; ) - HAM_DEBUG( HAM_LOG << "communicator::send_msg(): flushing msg took: " << flush.min().count() << std::endl; ) + HAM_DEBUG( HAM_LOG << "communicator::send_msg(): flushing msg took: " << ""/*flush.min().count()*/ << std::endl; ) // write flag to target flags buffer // not sure on the size here? - ham::util::time::timer t3; + // ham::util::time::timer t3; MPI_Put(&next_buffer_index, sizeof(next_buffer_index), MPI_BYTE, node, sizeof(cache_line_buffer) * buffer_index, sizeof(next_buffer_index), MPI_BYTE, peers[node].flag_win); - flag_put.add(t3); + // flag_put.add(t3); HAM_DEBUG( HAM_LOG << "communicator::send_msg(): wrote flag" << std::endl; ) - HAM_DEBUG( HAM_LOG << "communicator::send_msg(): writing flag took: " << flag_put.min().count() < $BUILD_LOG_BB 2>&1 echo "Installing Boost.Build ..." -./b2 install --prefix=$BOOST_INSTALL_PATH >> $BUILD_LOG_BB 2>&1 +./b2 install --prefix=${BOOST_INSTALL_PATH} >> $BUILD_LOG_BB 2>&1 PATH=$BOOST_INSTALL_PATH/bin:$PATH cd ../.. From 42d9303203057871c92a93dd06b44f0c1dfb1681 Mon Sep 17 00:00:00 2001 From: Daniel Deppisch Date: Thu, 11 Oct 2018 21:51:21 +0200 Subject: [PATCH 067/150] lock-get-unlock protocol --- .../ham/net/communicator_mpi_rma_dynamic.hpp | 51 ++++++++++++++----- 1 file changed, 39 insertions(+), 12 deletions(-) diff --git a/include/ham/net/communicator_mpi_rma_dynamic.hpp b/include/ham/net/communicator_mpi_rma_dynamic.hpp index f3cee5a..f73a559 100644 --- a/include/ham/net/communicator_mpi_rma_dynamic.hpp +++ b/include/ham/net/communicator_mpi_rma_dynamic.hpp @@ -3,8 +3,8 @@ // Distributed under the Boost Software License, Version 1.0. (See accompanying // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) -#ifndef ham_net_communicator_mpi_hpp -#define ham_net_communicator_mpi_hpp +#ifndef ham_net_communicator_mpi_rma_dynamic_hpp +#define ham_net_communicator_mpi_rma_dynamic_hpp #include @@ -265,7 +265,8 @@ class communicator { //MPI_Win_allocate(0, 1, MPI_INFO_NULL, MPI_COMM_WORLD, peers[i].flag_win_data, &(peers[i].rma_flag_win)); } } -/* + +/* // no longer needed // initialise all windows for target -> host for (node_t i = 1; i < nodes_; ++i) { if (is_host()) { @@ -287,7 +288,7 @@ class communicator { } } */ - // get all locks to targets + // get all locks to targets for data // targets lock to other targets for copies for (node_t i = 0; i < nodes_; ++i) { // TODO(improvement): needs to be changed when host-rank becomes configurable if (i != this_node_) { @@ -295,6 +296,8 @@ class communicator { } } + /* // locking will be done when accessing remote memory + // locks for active message rma transfers if (this_node_ != host_node_) { // targets MPI_Win_lock(MPI_LOCK_SHARED, 0, 0, peers[0].msg_win); MPI_Win_lock(MPI_LOCK_SHARED, 0, 0, peers[0].flag_win); @@ -304,7 +307,7 @@ class communicator { MPI_Win_lock(MPI_LOCK_SHARED, i, 0, peers[i].flag_win); } } - + */ HAM_DEBUG( HAM_LOG << "communicator::communicator(): rma window creation done" << std::endl; ) /* pairwise COMM stuff @@ -421,6 +424,7 @@ class communicator { // ham::util::time::statistics flag_put(1,0); // ham::util::time::timer t1; + MPI_Win_lock(MPI_LOCK_EXCLUSIVE, node, 0, peers[node].msg_win); MPI_Put(msg, size, MPI_BYTE, node, sizeof(msg_buffer) * buffer_index, size, MPI_BYTE, peers[node].msg_win); // msg_put.add(t1); HAM_DEBUG( HAM_LOG << "communicator::send_msg(): wrote msg" << std::endl; ) @@ -428,19 +432,23 @@ class communicator { // TODO DANIEL: because MPI does not guarantee order on RMA ops, there might be a FLUSH necessary here + // unlock includes flush, no need for it here + MPI_Win_unlock(node, peers[node].msg_win); // ham::util::time::timer t2; - MPI_Win_flush(node, peers[node].msg_win); + // MPI_Win_flush(node, peers[node].msg_win); // flush.add(t2); - HAM_DEBUG( HAM_LOG << "communicator::send_msg(): flushed msg" << std::endl; ) - HAM_DEBUG( HAM_LOG << "communicator::send_msg(): flushing msg took: " << ""/*flush.min().count()*/ << std::endl; ) + // HAM_DEBUG( HAM_LOG << "communicator::send_msg(): flushed msg" << std::endl; ) + // HAM_DEBUG( HAM_LOG << "communicator::send_msg(): flushing msg took: " << ""/*flush.min().count()*/ << std::endl; ) // write flag to target flags buffer // not sure on the size here? // ham::util::time::timer t3; + MPI_Win_lock(MPI_LOCK_EXCLUSIVE, node, 0, peers[node].flag_win); MPI_Put(&next_buffer_index, sizeof(next_buffer_index), MPI_BYTE, node, sizeof(cache_line_buffer) * buffer_index, sizeof(next_buffer_index), MPI_BYTE, peers[node].flag_win); // flag_put.add(t3); HAM_DEBUG( HAM_LOG << "communicator::send_msg(): wrote flag" << std::endl; ) HAM_DEBUG( HAM_LOG << "communicator::send_msg(): writing flag took: " << ""/*flag_put.min().count()*/ <(&peers[host_node_].flag_data.get()[offset + buffer_index]); } else { local_flag = reinterpret_cast(&peers[this_node_].flag_data.get()[buffer_index]); } + */ HAM_DEBUG( HAM_LOG << "communicator::recv_msg(): FLAG before polling: " << (int)*local_flag << std::endl; ) // pre_poll.add(t1); HAM_DEBUG( HAM_LOG << "communicator::recv_msg(): pre-polling took: " << ""/*pre_poll.min().count()*/ << std::endl; ) // ham::util::time::timer t2; - while (*local_flag == FLAG_FALSE); // poll on flag for completion + + + // needed on host to access the memory belonging to the node from which to receive + size_t offset = (this_node_ == host_node_) ? constants::MSG_BUFFERS * node : 0; + + while (received_flag == FLAG_FALSE) { + MPI_Win_lock(MPI_LOCK_EXCLUSIVE, this_node_, 0, peers[this_node_].flag_win); + MPI_Get(&received_flag, sizeof(size_t), MPI_BYTE, this_node_, offset , sizeof(size_t), MPI_BYTE, peers[this_node_].flag_win) + MPI_Win_unlock(this_node_, peers[this_node_].flag_win); + } // poll on flag for completion // poll.add(t2); HAM_DEBUG( HAM_LOG << "communicator::recv_msg(): FLAG after polling: " << (int)*local_flag << std::endl; ) HAM_DEBUG( HAM_LOG << "communicator::recv_msg(): polling took: " << ""/*poll.min().count()*/ << std::endl; ) + // make sure message window is updated locally too + MPI_Win_lock(MPI_LOCK_SHARED, this_node_, 0, peers[this_node_].msg_win); + MPI_Win_unlock(this_node_, peers[this_node_].msg_win); if (*local_flag != NO_BUFFER_INDEX) // the flag contains the next buffer index to poll on peers[node].next_flag = *local_flag; From 7549843a237f8e8468002c89c146fc70d92082bd Mon Sep 17 00:00:00 2001 From: Daniel Deppisch Date: Sun, 21 Oct 2018 00:10:00 +0200 Subject: [PATCH 068/150] test --- include/ham/misc/constants.hpp | 1 + include/ham/net/communicator.hpp | 8 +- .../ham/net/communicator_mpi_rma_dynamic.hpp | 237 +++++++----------- include/ham/offload/offload.hpp | 2 + include/ham/offload/offload_msg.hpp | 2 + 5 files changed, 102 insertions(+), 148 deletions(-) diff --git a/include/ham/misc/constants.hpp b/include/ham/misc/constants.hpp index 28483a9..113fc8c 100644 --- a/include/ham/misc/constants.hpp +++ b/include/ham/misc/constants.hpp @@ -17,6 +17,7 @@ namespace constants { enum net { MSG_SIZE = HAM_MESSAGE_SIZE, MSG_BUFFERS = 256, + FLAG_SIZE = sizeof(size_t), }; enum arch { diff --git a/include/ham/net/communicator.hpp b/include/ham/net/communicator.hpp index c754f99..65683c6 100644 --- a/include/ham/net/communicator.hpp +++ b/include/ham/net/communicator.hpp @@ -37,7 +37,13 @@ namespace net { { char data[constants::MSG_SIZE]; }; - + + struct + msg_flag_buffer + { + // buffer for fully rma backend. windows will consist of several of those + char data[constants::MSG_SIZE + constants::FLAG_SIZE]; + }; node_t this_node(); } diff --git a/include/ham/net/communicator_mpi_rma_dynamic.hpp b/include/ham/net/communicator_mpi_rma_dynamic.hpp index f73a559..08d7815 100644 --- a/include/ham/net/communicator_mpi_rma_dynamic.hpp +++ b/include/ham/net/communicator_mpi_rma_dynamic.hpp @@ -104,13 +104,13 @@ class communicator { HAM_DEBUG( HAM_LOG << "request::get(), before MPI_Waitall()" << std::endl; ) MPI_Waitall(req_count, mpi_reqs, MPI_STATUS_IGNORE); // must wait for all requests to satisfy the standard - HAM_DEBUG( HAM_LOG << "request::get(), after MPI_Waitall()" << std::endl; ) - if(uses_rma_) - { - MPI_Win_flush(target_node, communicator::instance().peers[target_node].rma_data_win); - // this is just a dummy return, there is no reply from the target for rma data transfers - // TODO, Daniel - design decision on what to return here - return static_cast(&communicator::instance().peers[communicator::this_node()].msg_data[local_buffer_index]); + // for async get from receive_data_async() this will block until get is completed + HAM_DEBUG( HAM_LOG << "request::get(), after MPI_Waitall()" << std::endl; ) + + if(uses_rma_) { + // this will only be true for async rma data transfers + // there will be no result returned, so this won't poll on anything and return a dummy instead. + return nullptr; } else { return communicator::instance().recv_msg(target_node, local_buffer_index); } @@ -223,19 +223,27 @@ class communicator { } }*/ - // initialise all windows + // initialise data windows for (node_t i = 0; i < nodes_; ++i) { // dynamic data window MPI_Win_create_dynamic(MPI_INFO_NULL, MPI_COMM_WORLD, &(peers[i].rma_data_win)); + } + + // initialise message windows + for (node_t i = 0; i < nodes_; ++i) { // loop through ranks if (i == this_node_) { // create local windows with allocated memory for targets, host creates one inbound set of windows for all targets - // allocate memory - if (this_node_ == host_node_) { - // MSG_SIZE/FLAG_SIZE * MSG_BUFFERS * num_nodes for host - peers[this_node_].msg_data = allocate_peer_buffer(constants::MSG_BUFFERS * nodes_, this_node_); - peers[this_node_].flag_data = allocate_peer_buffer(constants::MSG_BUFFERS * nodes_, this_node_); - reset_flags(peers[this_node_].flag_data, constants::MSG_BUFFERS * nodes_); - // fill resource pools + + // allocate memory and create windows + if (this_node_ == host_node_) { // host creates one large window with subsets associated with different targets + + // (MSG_SIZE+FLAG_SIZE) * MSG_BUFFERS * num_nodes = bytes of memory allocated (sizes are implicit in msg_flag_buffer struct) + peers[this_node_].msg_flag_data = allocate_peer_buffer(constants::MSG_BUFFERS * nodes_, this_node_); + // peers[this_node_].flag_data = allocate_peer_buffer(constants::MSG_BUFFERS * nodes_, this_node_); + // set flags to FLAG_FALSE + reset_flags(peers[this_node_].msg_flag_data, constants::MSG_BUFFERS * nodes_); // TODO: Daniel - this may be bad if buffer structs are not contiguos - check + + // fill resource pools for managing indices on the host for (size_t j = 0; j < nodes_; ++j) { for (size_t k = constants::MSG_BUFFERS; k > 0; --k) { // target buffers @@ -245,49 +253,37 @@ class communicator { // allocate first next_request, allocate_next_request(j); } - } else { - // MSG_SIZE/FLAG_SIZE * MSG_BUFFERS for targets - peers[this_node_].msg_data = allocate_peer_buffer(constants::MSG_BUFFERS, this_node_); - peers[this_node_].flag_data = allocate_peer_buffer(constants::MSG_BUFFERS, this_node_); + // create window with memory + MPI_Win_create((peers[this_node_].msg_flag_data.get()), sizeof(msg_flag_buffer) * constants::MSG_BUFFERS * nodes_, 1, MPI_INFO_NULL, MPI_COMM_WORLD, &(peers[this_node_].msg_flag_win)); + // MPI_Win_create((peers[this_node_].flag_data.get()), sizeof(cache_line_buffer) * constants::MSG_BUFFERS * nodes_, 1, MPI_INFO_NULL, MPI_COMM_WORLD, &(peers[this_node_].flag_win)); + + } else { // targets create one window with the size of their msg "queue" + // (MSG_SIZE+FLAG_SIZE) * MSG_BUFFERS = bytes of memory allocated (sizes are implicit in msg_flag_buffer struct) + peers[this_node_].msg_flag_data = allocate_peer_buffer(constants::MSG_BUFFERS, this_node_); + // peers[this_node_].flag_data = allocate_peer_buffer(constants::MSG_BUFFERS, this_node_); + // set flags to FLAG_FALSE reset_flags(peers[this_node_].flag_data, constants::MSG_BUFFERS); + + // create window with memory + MPI_Win_create((peers[this_node_].msg_flag_data.get()), sizeof(msg_buffer) * constants::MSG_BUFFERS, 1, MPI_INFO_NULL, MPI_COMM_WORLD, &(peers[this_node_].msg_flag_win)); + // MPI_Win_create((peers[this_node_].flag_data.get()), sizeof(cache_line_buffer) * constants::MSG_BUFFERS, 1, MPI_INFO_NULL, MPI_COMM_WORLD, &(peers[this_node_].flag_win)); } - // create windows - MPI_Win_create((peers[this_node_].msg_data.get()), sizeof(msg_buffer) * constants::MSG_BUFFERS * nodes_, 1, MPI_INFO_NULL, MPI_COMM_WORLD, &(peers[this_node_].msg_win)); - MPI_Win_create((peers[this_node_].flag_data.get()), sizeof(cache_line_buffer) * constants::MSG_BUFFERS * nodes_, 1, MPI_INFO_NULL, MPI_COMM_WORLD, &(peers[this_node_].flag_win)); + // debug msg + HAM_DEBUG( std::cout << "Rank: " << this_node_ << " in loop run " << i << " created REAL windows..." << std::endl; ) + - } else { //create remote windows without memory - void* dump; - MPI_Win_create(dump, 0, 1, MPI_INFO_NULL, MPI_COMM_WORLD, &(peers[i].msg_win)); - MPI_Win_create(dump, 0, 1, MPI_INFO_NULL, MPI_COMM_WORLD, &(peers[i].flag_win)); + } else { // create remote windows without memory (join the collective call and retreive the window handle) + MPI_Win_create(nullptr, 0, 1, MPI_INFO_NULL, MPI_COMM_WORLD, &(peers[i].msg_flag_win)); + // MPI_Win_create(nullptr, 0, 1, MPI_INFO_NULL, MPI_COMM_WORLD, &(peers[i].flag_win)); + // debug msg + HAM_DEBUG( std::cout << "Rank: " << this_node_ << " in loop run " << i << " creating EMPTY windows..." << std::endl; ) //MPI_Win_allocate(0, 1, MPI_INFO_NULL, MPI_COMM_WORLD, peers[i].msg_win_data, &(peers[i].rma_msg_win)); //MPI_Win_allocate(0, 1, MPI_INFO_NULL, MPI_COMM_WORLD, peers[i].flag_win_data, &(peers[i].rma_flag_win)); } } -/* // no longer needed - // initialise all windows for target -> host - for (node_t i = 1; i < nodes_; ++i) { - if (is_host()) { - // create local wins with memory for all targets - // allocate memory - - - // create window - MPI_Win_create(memptr, SIZE, MPI_BYTE, MPI_INFO_NULL, MPI_COMM_WORLD, &(peers[i].local_msg_win)); - MPI_Win_create(memptr, SIZE, MPI_BYTE, MPI_INFO_NULL, MPI_COMM_WORLD, &(peers[i].local_flag_win)); - - } else { - // create remote wins without memory for host - if (i == this_node_) { - MPI_Win_create(memptr, 0, MPI_BYTE, MPI_INFO_NULL, MPI_COMM_WORLD, &(peers[host_node_].local_msg_win)); - MPI_Win_create(memptr, 0, MPI_BYTE, MPI_INFO_NULL, MPI_COMM_WORLD, &(peers[host_node_].local_msg_win)); - - } - } - } -*/ // get all locks to targets for data // targets lock to other targets for copies for (node_t i = 0; i < nodes_; ++i) { // TODO(improvement): needs to be changed when host-rank becomes configurable @@ -296,6 +292,9 @@ class communicator { } } + // MPI_Barrier(MPI_COMM_WORLD); + + /* // locking will be done when accessing remote memory // locks for active message rma transfers if (this_node_ != host_node_) { // targets @@ -309,33 +308,8 @@ class communicator { } */ - HAM_DEBUG( HAM_LOG << "communicator::communicator(): rma window creation done" << std::endl; ) -/* pairwise COMM stuff - // both - // prepare global group to create pairwise groups - MPI_Comm_group(MPI_COMM_WORLD, &global_group); - // host - // init comm to target from pairwise subgroups - const int members[2] = {host_node_, i}; // NOTE: this implies new group rank is 0 for host, 1 for target - MPI_Group pairwise_group; - MPI_Group_incl(global_group, 2, members, &pairwise_group); - MPI_Comm_create_group(MPI_COMM_WORLD, pairwise_group, 0, &(peers[i].rma_comm)); - MPI_Group_free(&pairwise_group); // no longer needed after COMM is created - - // init win to target - MPI_Win_create_dynamic(MPI_INFO_NULL, peers[i].rma_comm, &(peers[i].rma_data_win)); - // targets - // init comm to host from pairwise subgroup - const int members[2] = {host_node_, this_node_}; // NOTE: this implies new group rank = 0 for host, 1 for target - MPI_Group pairwise_group; - MPI_Group_incl(global_group, 2, members, &pairwise_group); // should match the corresponding subgroup on host for i = this_node_ - MPI_Comm_create_group(MPI_COMM_WORLD, pairwise_group, 0, &(peers[host_node_].rma_comm)); - MPI_Group_free(&pairwise_group); // no longer needed after COMM is created - - // init win to host - MPI_Win_create_dynamic(MPI_INFO_NULL, peers[host_node_].rma_comm, &(peers[host_node_].rma_data_win)); - */ - + HAM_DEBUG( HAM_LOG << "communicator::communicator(): rma window creation completed" << std::endl; ) + HAM_DEBUG( HAM_LOG << "communicator::communicator(): communicator initialization completed" << std::endl; ) } ~communicator() @@ -344,20 +318,20 @@ class communicator { HAM_DEBUG( HAM_LOG << "~communicator" << std::endl; ) } - // this is only used by the host + // this is only used by the host to manage remote msg buffers and local reply buffers and assign them to requests const request& allocate_next_request(node_t remote_node) { - // this allocates a host-managed index for the remote nodes msg and flag buffers + // this allocates a host-managed index for the remote nodes msg "queue" // so the host knows which buffers are available on the target const size_t remote_buffer_index = peers[remote_node].remote_buffer_pool.allocate(); - // this allocates an index for the hosts large msg and flag buffers - // request is included in offload message, so target knows into which buffers answers must be written + // this allocates an index in the hosts "reply queue" + // request is included in offload message, so the target knows into which buffers replys must be written // when used, the index will need to be added to an offset determined by a targets rank to address the part of the buffer belonging to this target // NOTE: the actual host buffer is stored at the hosts peers[0], but the buffer_pools are stored at the corresponding peers[target] // buffer_pools manage idices within the targets section of the hosts buffer const size_t local_buffer_index = peers[remote_node].local_buffer_pool.allocate(); - peers[remote_node].next_request = { remote_node, this_node_, remote_buffer_index, local_buffer_index}; + peers[remote_node].next_request = {remote_node, this_node_, remote_buffer_index, local_buffer_index}; return peers[remote_node].next_request; } @@ -370,7 +344,8 @@ class communicator { return peers[remote_node].next_request; } - // used for async rma data transfers, so they wont take up buffer indices they dont need + // used for rma data transfers, so they wont take up unneeded buffer indices + // only put() and get() use this, copy() offloads an active msg to the data source and therefore uses allocate_request() request allocate_data_request(node_t remote_node) { HAM_DEBUG( HAM_LOG << "communicator::allocate_next_request(): remote_node = " << remote_node << std::endl; ) return { remote_node, this_node_, NO_BUFFER_INDEX, NO_BUFFER_INDEX }; @@ -390,12 +365,13 @@ class communicator { mpi_peer& peer = peers[req.target_node]; - // set flags to false - // local flag inside large host flag buffer @ peers[host] + // set flag for buffer indices associated with request to false + // local flag is inside the hosts large array of msg_flag_buffers @ peers[host] // index offset computed using target node - size_t offset = constants::MSG_BUFFERS * req.target_node; - volatile size_t* local_flag = reinterpret_cast(&peers[host_node_].flag_data.get()[offset + req.local_buffer_index]); - *local_flag= FLAG_FALSE; + // TODO: Daniel - figure out access to flag memory + size_t offset = constants::MSG_BUFFERS * req.target_node; // offset msg_flag_buffers to the corresponding nodes region + volatile size_t* local_flag = reinterpret_cast(&peers[host_node_].msg_flag_data.get()[offset + req.local_buffer_index]); // this will point to the beginning of a msg_flag_buffer + *local_flag = FLAG_FALSE; // remote flag on target /* This is done by the target after having reveived the new index to poll on size_t remote_flag = FLAG_FALSE; @@ -455,7 +431,9 @@ class communicator { // ham::util::time::statistics flush(1,0); // ham::util::time::statistics flag_put(1,0); + // compute offset in the hosts window size_t offset = constants::MSG_BUFFERS * this_node_; + HAM_DEBUG( HAM_LOG << "communicator::send_msg(): using msg host-offset (bytes): " << offset*sizeof(msg_buffer) << std::endl; ) // ham::util::time::timer t1; MPI_Win_lock(MPI_LOCK_EXCLUSIVE, node, 0, peers[node].msg_win); MPI_Put(msg, size, MPI_BYTE, node, sizeof(msg_buffer) * (offset + buffer_index), size, MPI_BYTE, peers[node].msg_win); @@ -471,8 +449,8 @@ class communicator { HAM_DEBUG( HAM_LOG << "communicator::send_msg(): flushing msg took: " << ""/*flush.min().count()*/ << std::endl; ) // ham::util::time::timer t3; + HAM_DEBUG( HAM_LOG << "communicator::send_msg(): using flag host-offset (bytes): " << offset*sizeof(cache_line_buffer) << std::endl; ) MPI_Win_lock(MPI_LOCK_EXCLUSIVE, node, 0, peers[node].flag_win); - MPI_Put(&next_buffer_index, sizeof(next_buffer_index), MPI_BYTE, node, sizeof(cache_line_buffer) * (offset + buffer_index), sizeof(next_buffer_index), MPI_BYTE, peers[node].flag_win); // flag_put.add(t3); MPI_Win_unlock(node, peers[node].flag_win); @@ -506,43 +484,50 @@ class communicator { HAM_DEBUG( HAM_LOG << "communicator::recv_msg(): remote node is: " << node << std::endl; ) HAM_DEBUG( HAM_LOG << "communicator::recv_msg(): using buffer index: " << buffer_index << std::endl; ) - size_t received_flag; + size_t *local_flag; + size_t received_flag = FLAG_FALSE; + + // needed on host to access the memory belonging to the node from which to receive + size_t offset = (this_node_ == host_node_) ? constants::MSG_BUFFERS * node : 0; - /* not needed with get if (this_node_ == host_node_) { local_flag = reinterpret_cast(&peers[host_node_].flag_data.get()[offset + buffer_index]); } else { local_flag = reinterpret_cast(&peers[this_node_].flag_data.get()[buffer_index]); } - */ + HAM_DEBUG( HAM_LOG << "communicator::recv_msg(): FLAG before polling: " << (int)*local_flag << std::endl; ) // pre_poll.add(t1); HAM_DEBUG( HAM_LOG << "communicator::recv_msg(): pre-polling took: " << ""/*pre_poll.min().count()*/ << std::endl; ) // ham::util::time::timer t2; + HAM_DEBUG( HAM_LOG << "communicator::recv_msg(): polling at offset (bytes): " << offset * sizeof(cache_line_buffer) << ""/*pre_poll.min().count()*/ << std::endl; ) - // needed on host to access the memory belonging to the node from which to receive - size_t offset = (this_node_ == host_node_) ? constants::MSG_BUFFERS * node : 0; while (received_flag == FLAG_FALSE) { MPI_Win_lock(MPI_LOCK_EXCLUSIVE, this_node_, 0, peers[this_node_].flag_win); - MPI_Get(&received_flag, sizeof(size_t), MPI_BYTE, this_node_, offset , sizeof(size_t), MPI_BYTE, peers[this_node_].flag_win) + MPI_Get(&received_flag, sizeof(cache_line_buffer), MPI_BYTE, this_node_, (offset + buffer_index) * sizeof(cache_line_buffer) , sizeof(cache_line_buffer), MPI_BYTE, peers[this_node_].flag_win); MPI_Win_unlock(this_node_, peers[this_node_].flag_win); } // poll on flag for completion // poll.add(t2); HAM_DEBUG( HAM_LOG << "communicator::recv_msg(): FLAG after polling: " << (int)*local_flag << std::endl; ) HAM_DEBUG( HAM_LOG << "communicator::recv_msg(): polling took: " << ""/*poll.min().count()*/ << std::endl; ) + // reset the flag (thanks mpi for requiring me to get a lock for that again... + MPI_Win_lock(MPI_LOCK_EXCLUSIVE, this_node_, 0, peers[this_node_].flag_win); + *local_flag = FLAG_FALSE; + MPI_Win_unlock(this_node_, peers[this_node_].flag_win); + // make sure message window is updated locally too MPI_Win_lock(MPI_LOCK_SHARED, this_node_, 0, peers[this_node_].msg_win); MPI_Win_unlock(this_node_, peers[this_node_].msg_win); - if (*local_flag != NO_BUFFER_INDEX) // the flag contains the next buffer index to poll on - peers[node].next_flag = *local_flag; + if (received_flag != NO_BUFFER_INDEX) // the flag contains the next buffer index to poll on + peers[node].next_flag = received_flag; - *local_flag = FLAG_FALSE; + HAM_DEBUG( HAM_LOG << "communicator::recv_msg(): done " << ""/*poll.min().count()*/ << std::endl; ) if (this_node_ == host_node_) { size_t offset = constants::MSG_BUFFERS * node; @@ -566,14 +551,16 @@ class communicator { void recv_result(request_reference_type req) { // nothing todo here, since this communicator implementation uses one-sided communication - // the data is already where it is expected (in the buffer referenced in req) + // the data will be written to where it is expected // MPI_Irecv(static_cast(&peers[req.target_node].msg_buffers[req.recv_buffer_index]), constants::MSG_SIZE, MPI_BYTE, req.target_node, constants::RESULT_TAG, MPI_COMM_WORLD, &req.next_mpi_request()); return; } + // only used by the host bool test_local_flag(node_t node, size_t buffer_index) { + size_t offset = (constants::MSG_SIZE + constants::FLAG_SIZE) * node; volatile size_t * local_flag = reinterpret_cast(&peers[node].flag_data.get()[buffer_index]); return *local_flag != FLAG_FALSE; } @@ -597,10 +584,8 @@ class communicator { void send_data(T* local_source, buffer_ptr remote_dest, size_t size) { // execute transfer - // MPI_Win_lock(MPI_LOCK_SHARED, remote_dest.node(), 0, peers[remote_dest.node()].rma_data_win); // not needed since all ranks have locks on all targets MPI_Put(local_source, size * sizeof(T), MPI_BYTE, remote_dest.node(), remote_dest.get_mpi_address(), size * sizeof(T), MPI_BYTE, peers[remote_dest.node()].rma_data_win); MPI_Win_flush(remote_dest.node(), peers[remote_dest.node()].rma_data_win); - // MPI_Win_unlock(remote_dest.node(), peers[remote_dest.node()].rma_data_win); } // to be used by the host only @@ -609,7 +594,6 @@ class communicator { { req.uses_rma_ = true; - // MPI_Win_lock(MPI_LOCK_SHARED, remote_dest.node(), 0, peers[remote_dest.node()].rma_data_win); MPI_Rput(local_source, size * sizeof(T), MPI_BYTE, remote_dest.node(), remote_dest.get_mpi_address(), size * sizeof(T), MPI_BYTE, peers[remote_dest.node()].rma_data_win, &req.next_mpi_request()); } @@ -620,10 +604,8 @@ class communicator { template void recv_data(buffer_ptr remote_source, T* local_dest, size_t size) { - // MPI_Win_lock(MPI_LOCK_SHARED, remote_source.node(), 0, peers[remote_source.node()].rma_data_win); MPI_Get(local_dest, size * sizeof(T), MPI_BYTE, remote_source.node(), remote_source.get_mpi_address(), size * sizeof(T), MPI_BYTE, peers[remote_source.node()].rma_data_win); MPI_Win_flush(remote_source.node(), peers[remote_source.node()].rma_data_win); - // MPI_Win_unlock(remote_source.node(), peers[remote_source.node()].rma_data_win); } // to be used by the host @@ -632,7 +614,6 @@ class communicator { { req.uses_rma_ = true; - // MPI_Win_lock(MPI_LOCK_SHARED, remote_source.node(), 0, peers[remote_source.node()].rma_data_win); MPI_Rget(local_dest, size * sizeof(T), MPI_BYTE, remote_source.node(), remote_source.get_mpi_address(), size * sizeof(T), MPI_BYTE, peers[remote_source.node()].rma_data_win, &req.next_mpi_request()); } @@ -642,12 +623,11 @@ class communicator { T* ptr; //int err = posix_memalign((void**)&ptr, constants::CACHE_LINE_SIZE, n * sizeof(T)); + // attach to own window HAM_DEBUG( HAM_LOG << "communicator::allocate_buffer(), allocating buffer @: " << (long)ptr << std::endl; ) MPI_Win_attach(peers[this_node_].rma_data_win, (void*)ptr, n * sizeof(T)); - /* for (node_t i = 1; i < nodes_; ++i) { // nonsense, all accesses to a rank will only take place on that targets window, no need to attach to other - MPI_Win_attach(peers[i].rma_data_win, (void*)ptr, n * sizeof(T)); - } */ + MPI_Aint mpi_address; MPI_Get_address((void*)ptr, &mpi_address); // NOTE: no ctor is called @@ -658,13 +638,10 @@ class communicator { template buffer_ptr allocate_peer_buffer(const size_t n, node_t source_node) { - // TODO DANIEL: this is where mem is allocated that should be mapped to static mpi windows T* ptr; - //int err = posix_memalign((void**)&ptr, constants::CACHE_LINE_SIZE, n * sizeof(T)); // NOTE: no ctor is called return buffer_ptr(ptr, this_node_); - } // used for data buffers only @@ -704,36 +681,6 @@ class communicator { return instance().node_descriptions[node]; } -/* - // called to check if an rma path between two targets exists, sufficient to call on one of the two targets - bool has_rma_path(node_t target_node) { - // check if copy path exists - return !peers[remote_dest.node()].rma_data_win; - } -*/ -/* - // called to establish an rma path between two targets for copy operations, needs to be called on both sides - void establish_rma_path(node_t target_node) { - if(!has_rma_path(target_node)) { // make sure there is not already an rma path - const int members[2]; - // NOTE: protocol for target-target sub-ranks is: lower global rank: 0, higher global rank: 1 - // thus rank for existing copy paths can be easily translated by comparing target rank to own rank - if(this_node_ > target_node) { - members[0] = target_node; - members[1] = this_node_; - } else { - members[0] = this_node_; - members[1] = target_node; - } - MPI_Group pairwise_group; - MPI_Group_incl(global_group, 2, members, &pairwise_group); - MPI_Comm_create_group(MPI_COMM_WORLD, pairwise_group, 0, &(peers[target_node].rma_comm)); - MPI_Group_free(&pairwise_group); // no longer needed after COMM is created - MPI_Win_create_dynamic(MPI_INFO_NULL, peers[target_node].rma_comm, &(peers[target_node].rma_data_win)); - } - } -*/ - private: static communicator* instance_; node_t this_node_; @@ -751,15 +698,11 @@ class communicator { request next_request; size_t next_flag = 0; + // NOTE: behind these buffers are MSG_BUFFERS many buffers of size MSG_SIZE+FLAG_SIZE, indices are managed by buffer_pool - // NOTE: behind these buffers are MSG_BUFFERS many buffers of size MSG_SIZE/CACHE_LINE_SIZE, indices are managed by buffer_pool - - // static window for inbound rma messages - buffer_ptr msg_data; - MPI_Win msg_win; - // static window for inbound message flags - buffer_ptr flag_data; - MPI_Win flag_win; + // static window for inbound rma messages and their flags + buffer_ptr msg_flag_data; + MPI_Win msg_flag_win; // mpi rma dynamic window for data MPI_Win rma_data_win; diff --git a/include/ham/offload/offload.hpp b/include/ham/offload/offload.hpp index a315c50..0148e7f 100644 --- a/include/ham/offload/offload.hpp +++ b/include/ham/offload/offload.hpp @@ -270,6 +270,7 @@ future get(buffer_ptr remote_source, T* local_dest, size_t n) comm.send_msg(result.get_request(), (void*)&msg, sizeof msg); comm.recv_data_async(result.get_request(), remote_source, local_dest, n); comm.recv_result(result.get_request()); // trigger receiving the result + // TODO(improvement): the recv_result() is not needed, could remove and remove send_result() from offload_read_msg to reduce synchronization overhead return result; #elif defined HAM_COMM_MPI_RMA_DYNAMIC @@ -355,6 +356,7 @@ void copy_sync(buffer_ptr source, buffer_ptr dest, size_t n) comm.recv_result(write_result.get_request()); // trigger receiving the msg result // async // synchronise + // TODO(improvement): this is oversynchronized, waiting for the target to complete receiving should be sufficient read_result.get(); write_result.get(); #elif defined HAM_COMM_MPI_RMA_DYNAMIC diff --git a/include/ham/offload/offload_msg.hpp b/include/ham/offload/offload_msg.hpp index 97c5e95..6e709a4 100644 --- a/include/ham/offload/offload_msg.hpp +++ b/include/ham/offload/offload_msg.hpp @@ -95,6 +95,7 @@ class offload_write_msg communicator::instance().recv_data(buffer_ptr(nullptr, remote_node), local_dest, n); // NOTE: Why nullptr? This is for two-sided communicators, so we do not know the remote address, but match a send operation that has the address. // send a result to tell the sender, that the transfer is done + // TODO(improvement): this may be if (req.valid()) { req.send_result((void*)&n, sizeof n); } @@ -122,6 +123,7 @@ class offload_read_msg communicator::instance().send_data(local_source, buffer_ptr(nullptr, remote_node), n); // NOTE: Why nullptr? This is for two-sided communicators, so we do not know the remote address, but match a receive operation that has the address. // send a result message to tell the sender, that the transfer is done + // TODO(improvement): this may be removed along with receiving the result in offload get() if (req.valid()) { req.send_result((void*)&n, sizeof n); } From 1a907970c7d221868f45c2556a570db45f400d22 Mon Sep 17 00:00:00 2001 From: Daniel Deppisch Date: Sun, 28 Oct 2018 00:18:10 +0200 Subject: [PATCH 069/150] functional version, lots of changes --- include/ham/misc/constants.hpp | 3 +- include/ham/misc/types.hpp | 1 + include/ham/net/communicator.hpp | 3 +- .../ham/net/communicator_mpi_rma_dynamic.hpp | 334 ++++++++---------- 4 files changed, 161 insertions(+), 180 deletions(-) diff --git a/include/ham/misc/constants.hpp b/include/ham/misc/constants.hpp index 113fc8c..b37c690 100644 --- a/include/ham/misc/constants.hpp +++ b/include/ham/misc/constants.hpp @@ -17,7 +17,8 @@ namespace constants { enum net { MSG_SIZE = HAM_MESSAGE_SIZE, MSG_BUFFERS = 256, - FLAG_SIZE = sizeof(size_t), + DATA_PUT_CODE = 1, + DATA_GET_CODE = 2, }; enum arch { diff --git a/include/ham/misc/types.hpp b/include/ham/misc/types.hpp index 458eefc..1b8393d 100644 --- a/include/ham/misc/types.hpp +++ b/include/ham/misc/types.hpp @@ -12,6 +12,7 @@ namespace ham { typedef size_t node_t; // node type, e.g. MPI rank, identifies remote target process +typedef size_t flag_t; // MPI RMA completion flag / buffer index typedef char* msg_buffer_t; // buffer type for messages namespace net { diff --git a/include/ham/net/communicator.hpp b/include/ham/net/communicator.hpp index 65683c6..a0a6164 100644 --- a/include/ham/net/communicator.hpp +++ b/include/ham/net/communicator.hpp @@ -42,7 +42,8 @@ namespace net { msg_flag_buffer { // buffer for fully rma backend. windows will consist of several of those - char data[constants::MSG_SIZE + constants::FLAG_SIZE]; + char msg[constants::MSG_SIZE]; + flag_t flag; }; node_t this_node(); diff --git a/include/ham/net/communicator_mpi_rma_dynamic.hpp b/include/ham/net/communicator_mpi_rma_dynamic.hpp index 08d7815..4afd7f5 100644 --- a/include/ham/net/communicator_mpi_rma_dynamic.hpp +++ b/include/ham/net/communicator_mpi_rma_dynamic.hpp @@ -77,40 +77,39 @@ class communicator { request() : valid_(false) {} // instantiate invalid request(node_t target_node, node_t source_node, size_t remote_buffer_index, size_t local_buffer_index) - : target_node(target_node), source_node(source_node), valid_(true), remote_buffer_index(remote_buffer_index), local_buffer_index(local_buffer_index), req_count(0), uses_rma_(false) + : target_node(target_node), source_node(source_node), valid_(true), remote_buffer_index(remote_buffer_index), local_buffer_index(local_buffer_index), req_count(0), data_transfer_type(0) {} // return true if request was finished // will not work as intended for rma ops, no equivalent to test() available for remote completion bool test() { - // int flag = 0; - - // MPI_Testall(req_count, mpi_reqs, &flag, MPI_STATUS_IGNORE); // just test the receive request, since the send belonging to the request triggers the remote send that is received - - /* - if(uses_rma_) - { - HAM_DEBUG( HAM_LOG << "request::test(), warning: may give false positive on rma remote completion" << std::endl; ) + if(data_transfer_type) { // this will be true for rma data transfers + int flag = 0; + MPI_Testall(req_count, mpi_reqs, &flag, MPI_STATUS_IGNORE); // test on RGET is what we want, because local completion = full completion for get, but for RPut local is not enough and there is no non-blocking remote-completion test + HAM_DEBUG( HAM_LOG << "request::test(), warning: may give false positive on rma put remote completion" << std::endl; ) + // TODO - Daniel: this is bad but MPI RMA doesn't have anything better + // TODO - Daniel: discuss preliminary design decision with Matthias: false positive + longer block = better than false negative as users may poll on this and get stuck + return flag != 0; } - - return flag != 0; - */ return communicator::instance().test_local_flag(target_node, local_buffer_index); } void* get() // blocks { - - HAM_DEBUG( HAM_LOG << "request::get(), before MPI_Waitall()" << std::endl; ) - MPI_Waitall(req_count, mpi_reqs, MPI_STATUS_IGNORE); // must wait for all requests to satisfy the standard - // for async get from receive_data_async() this will block until get is completed - HAM_DEBUG( HAM_LOG << "request::get(), after MPI_Waitall()" << std::endl; ) - - if(uses_rma_) { - // this will only be true for async rma data transfers + if(data_transfer_type) { + HAM_DEBUG( HAM_LOG << "request::get(), before MPI_Waitall()" << std::endl; ) + MPI_Waitall(req_count, mpi_reqs, MPI_STATUS_IGNORE); // Get will have fully completed + HAM_DEBUG( HAM_LOG << "request::get(), after MPI_Waitall()" << std::endl; ) + if(data_transfer_type == constants::DATA_PUT_CODE) { + HAM_DEBUG( HAM_LOG << "request::get(), before MPI_Win_flush()" << std::endl; ) + communicator::instance().flush_data(target_node); + HAM_DEBUG( HAM_LOG << "request::get(), after MPI_Win_flush()" << std::endl; ) + } + // this will only be true for async rma GETs // there will be no result returned, so this won't poll on anything and return a dummy instead. return nullptr; + // TODO - Daniel: this is bad but MPI RMA doesn't have anything better } else { return communicator::instance().recv_msg(target_node, local_buffer_index); } @@ -131,9 +130,9 @@ class communicator { return valid_; } - bool uses_rma() const + bool is_rma_data_transfer() const { - return uses_rma_; + return data_transfer_type; } MPI_Request& next_mpi_request() @@ -146,7 +145,7 @@ class communicator { node_t target_node; node_t source_node; bool valid_; - bool uses_rma_; + short data_transfer_type; // only needed by the sender enum { NUM_REQUESTS = 3 }; @@ -241,7 +240,7 @@ class communicator { peers[this_node_].msg_flag_data = allocate_peer_buffer(constants::MSG_BUFFERS * nodes_, this_node_); // peers[this_node_].flag_data = allocate_peer_buffer(constants::MSG_BUFFERS * nodes_, this_node_); // set flags to FLAG_FALSE - reset_flags(peers[this_node_].msg_flag_data, constants::MSG_BUFFERS * nodes_); // TODO: Daniel - this may be bad if buffer structs are not contiguos - check + reset_flags(peers[this_node_].msg_flag_data, constants::MSG_BUFFERS * nodes_); // structs are contiguos, this is ok // fill resource pools for managing indices on the host for (size_t j = 0; j < nodes_; ++j) { @@ -262,7 +261,7 @@ class communicator { peers[this_node_].msg_flag_data = allocate_peer_buffer(constants::MSG_BUFFERS, this_node_); // peers[this_node_].flag_data = allocate_peer_buffer(constants::MSG_BUFFERS, this_node_); // set flags to FLAG_FALSE - reset_flags(peers[this_node_].flag_data, constants::MSG_BUFFERS); + reset_flags(peers[this_node_].msg_flag_data, constants::MSG_BUFFERS); // create window with memory MPI_Win_create((peers[this_node_].msg_flag_data.get()), sizeof(msg_buffer) * constants::MSG_BUFFERS, 1, MPI_INFO_NULL, MPI_COMM_WORLD, &(peers[this_node_].msg_flag_win)); @@ -365,15 +364,16 @@ class communicator { mpi_peer& peer = peers[req.target_node]; - // set flag for buffer indices associated with request to false + // reset local flag // local flag is inside the hosts large array of msg_flag_buffers @ peers[host] // index offset computed using target node - // TODO: Daniel - figure out access to flag memory + // as this is an access to rma window memory, we need to lock again... + MPI_Win_lock(MPI_LOCK_EXCLUSIVE, this_node_, 0, peers[this_node_].msg_flag_win); size_t offset = constants::MSG_BUFFERS * req.target_node; // offset msg_flag_buffers to the corresponding nodes region - volatile size_t* local_flag = reinterpret_cast(&peers[host_node_].msg_flag_data.get()[offset + req.local_buffer_index]); // this will point to the beginning of a msg_flag_buffer - *local_flag = FLAG_FALSE; + peers[this_node_].msg_flag_data.get()[offset + req.local_buffer_index].flag = FLAG_FALSE; + MPI_Win_unlock(this_node_, peers[this_node_].msg_flag_win); // remote flag on target - /* This is done by the target after having reveived the new index to poll on + /* This is done by the target after having received the new index to poll on size_t remote_flag = FLAG_FALSE; MPI_Put(&remote_flag, sizeof(remote_flag), MPI_BYTE, req.target_node, 0, sizeof(remote_flag), MPI_BYTE, peer.flag_win); // flush? don't think so @@ -400,31 +400,30 @@ class communicator { // ham::util::time::statistics flag_put(1,0); // ham::util::time::timer t1; - MPI_Win_lock(MPI_LOCK_EXCLUSIVE, node, 0, peers[node].msg_win); - MPI_Put(msg, size, MPI_BYTE, node, sizeof(msg_buffer) * buffer_index, size, MPI_BYTE, peers[node].msg_win); - // msg_put.add(t1); - HAM_DEBUG( HAM_LOG << "communicator::send_msg(): wrote msg" << std::endl; ) - HAM_DEBUG( HAM_LOG << "communicator::send_msg(): writing msg took: " << ""/*msg_put.min().count()*/ << std::endl; ) - - - // TODO DANIEL: because MPI does not guarantee order on RMA ops, there might be a FLUSH necessary here - // unlock includes flush, no need for it here - MPI_Win_unlock(node, peers[node].msg_win); - // ham::util::time::timer t2; - // MPI_Win_flush(node, peers[node].msg_win); - // flush.add(t2); - // HAM_DEBUG( HAM_LOG << "communicator::send_msg(): flushed msg" << std::endl; ) - // HAM_DEBUG( HAM_LOG << "communicator::send_msg(): flushing msg took: " << ""/*flush.min().count()*/ << std::endl; ) - - // write flag to target flags buffer - // not sure on the size here? - // ham::util::time::timer t3; - MPI_Win_lock(MPI_LOCK_EXCLUSIVE, node, 0, peers[node].flag_win); - MPI_Put(&next_buffer_index, sizeof(next_buffer_index), MPI_BYTE, node, sizeof(cache_line_buffer) * buffer_index, sizeof(next_buffer_index), MPI_BYTE, peers[node].flag_win); + MPI_Win_lock(MPI_LOCK_EXCLUSIVE, node, 0, peers[node].msg_flag_win); + // put msg + MPI_Put(msg, size, MPI_BYTE, node, sizeof(msg_flag_buffer) * buffer_index, size, MPI_BYTE, peers[node].msg_flag_win); + // put flag + MPI_Put(&next_buffer_index, sizeof(next_buffer_index), MPI_BYTE, node, sizeof(msg_flag_buffer) * buffer_index + constants::MSG_SIZE, sizeof(next_buffer_index), MPI_BYTE, peers[node].msg_flag_win); + // msg_put.add(t1); + MPI_Win_unlock(node, peers[node].msg_flag_win); + HAM_DEBUG( HAM_LOG << "communicator::send_msg(): wrote msg + flag" << std::endl; ) + + // unlock includes flush, no need for it here + // ham::util::time::timer t2; + // MPI_Win_flush(node, peers[node].msg_win); + // flush.add(t2); + // HAM_DEBUG( HAM_LOG << "communicator::send_msg(): flushed msg" << std::endl; ) + // HAM_DEBUG( HAM_LOG << "communicator::send_msg(): flushing msg took: " << ""/*flush.min().count()*/ << std::endl; ) + + // write flag to target flags buffer + // not sure on the size here? + // ham::util::time::timer t3; + // MPI_Win_lock(MPI_LOCK_EXCLUSIVE, node, 0, peers[node].msg_flag_win); // flag_put.add(t3); - HAM_DEBUG( HAM_LOG << "communicator::send_msg(): wrote flag" << std::endl; ) - HAM_DEBUG( HAM_LOG << "communicator::send_msg(): writing flag took: " << ""/*flag_put.min().count()*/ <(&peers[req.target_node].msg_buffers[req.send_buffer_index]); - memcpy(msg_buffer, msg, size); - MPI_Isend(msg_buffer, size, MPI_BYTE, req.target_node, constants::DEFAULT_TAG, MPI_COMM_WORLD, &req.next_mpi_request()); - */ - - const request& next_req = allocate_next_request(req.target_node); // allocate_next_req needed?? + // this is used by the host + void send_msg(request_reference_type req, void* msg, size_t size) { + const request& next_req = allocate_next_request(req.target_node); // this is only required for the host send_msg(req.target_node, req.remote_buffer_index, next_req.remote_buffer_index, msg, size); } // make private?! // called by function below - void* recv_msg(node_t node, size_t buffer_index = NO_BUFFER_INDEX, void* msg = nullptr, size_t size = constants::MSG_SIZE) - { - // ham::util::time::statistics pre_poll(1,0); - // ham::util::time::statistics poll(1,0); - // ham::util::time::timer t1; - buffer_index = buffer_index == NO_BUFFER_INDEX ? peers[node].next_flag : buffer_index; - HAM_DEBUG( HAM_LOG << "communicator::recv_msg(): remote node is: " << node << std::endl; ) - HAM_DEBUG( HAM_LOG << "communicator::recv_msg(): using buffer index: " << buffer_index << std::endl; ) + void* recv_msg(node_t node, size_t buffer_index = NO_BUFFER_INDEX, void* msg = nullptr, size_t size = constants::MSG_SIZE) { + buffer_index = buffer_index == NO_BUFFER_INDEX ? peers[node].next_flag : buffer_index; + HAM_DEBUG(HAM_LOG << "communicator::recv_msg(): remote node is: " << node << std::endl; ) + HAM_DEBUG(HAM_LOG << "communicator::recv_msg(): using buffer index: " << buffer_index << std::endl; ) - size_t *local_flag; - size_t received_flag = FLAG_FALSE; + // size_t *local_flag; + flag_t received_flag = FLAG_FALSE; // needed on host to access the memory belonging to the node from which to receive size_t offset = (this_node_ == host_node_) ? constants::MSG_BUFFERS * node : 0; - if (this_node_ == host_node_) { - local_flag = reinterpret_cast(&peers[host_node_].flag_data.get()[offset + buffer_index]); - } else { - local_flag = reinterpret_cast(&peers[this_node_].flag_data.get()[buffer_index]); - } - - - HAM_DEBUG( HAM_LOG << "communicator::recv_msg(): FLAG before polling: " << (int)*local_flag << std::endl; ) - // pre_poll.add(t1); - HAM_DEBUG( HAM_LOG << "communicator::recv_msg(): pre-polling took: " << ""/*pre_poll.min().count()*/ << std::endl; ) - // ham::util::time::timer t2; - HAM_DEBUG( HAM_LOG << "communicator::recv_msg(): polling at offset (bytes): " << offset * sizeof(cache_line_buffer) << ""/*pre_poll.min().count()*/ << std::endl; ) - - - - while (received_flag == FLAG_FALSE) { - MPI_Win_lock(MPI_LOCK_EXCLUSIVE, this_node_, 0, peers[this_node_].flag_win); - MPI_Get(&received_flag, sizeof(cache_line_buffer), MPI_BYTE, this_node_, (offset + buffer_index) * sizeof(cache_line_buffer) , sizeof(cache_line_buffer), MPI_BYTE, peers[this_node_].flag_win); - MPI_Win_unlock(this_node_, peers[this_node_].flag_win); - } // poll on flag for completion - // poll.add(t2); - HAM_DEBUG( HAM_LOG << "communicator::recv_msg(): FLAG after polling: " << (int)*local_flag << std::endl; ) - HAM_DEBUG( HAM_LOG << "communicator::recv_msg(): polling took: " << ""/*poll.min().count()*/ << std::endl; ) - - // reset the flag (thanks mpi for requiring me to get a lock for that again... - MPI_Win_lock(MPI_LOCK_EXCLUSIVE, this_node_, 0, peers[this_node_].flag_win); - *local_flag = FLAG_FALSE; - MPI_Win_unlock(this_node_, peers[this_node_].flag_win); - - // make sure message window is updated locally too - MPI_Win_lock(MPI_LOCK_SHARED, this_node_, 0, peers[this_node_].msg_win); - MPI_Win_unlock(this_node_, peers[this_node_].msg_win); - - if (received_flag != NO_BUFFER_INDEX) // the flag contains the next buffer index to poll on - peers[node].next_flag = received_flag; + HAM_DEBUG(HAM_LOG << "communicator::recv_msg(): FLAG before polling: " << peers[this_node_].msg_flag_data.get()[offset + buffer_index].flag << std::endl; ) + HAM_DEBUG(HAM_LOG << "communicator::recv_msg(): polling at offset (bytes): " + << (offset + buffer_index) * sizeof(msg_flag_buffer) << std::endl; ) + + while (received_flag == FLAG_FALSE) { + MPI_Win_lock(MPI_LOCK_EXCLUSIVE, this_node_, 0, peers[this_node_].msg_flag_win); + MPI_Get(&received_flag, sizeof(flag_t), MPI_BYTE, this_node_, + sizeof(msg_flag_buffer) * (offset + buffer_index) + constants::MSG_SIZE, sizeof(flag_t), + MPI_BYTE, peers[this_node_].msg_flag_win); + // using a get here, by standard just accessing the memory should be okay too, like below + // received_flag = peers[this_node_].msg_flag_data.get()[offset + buffer_index].flag); + MPI_Win_unlock(this_node_, peers[this_node_].msg_flag_win); + } // poll on flag for completion + HAM_DEBUG(HAM_LOG << "communicator::recv_msg(): FLAG after polling: " + << peers[this_node_].msg_flag_data.get()[offset + buffer_index].flag << std::endl; ) + + // reset the flag + // this is weird: theoretically this reset does not need to be visible publicly, as no other process ever reads the flags -> could leave out the locking + // however, if in the separate model the local wincopy has an update that is not updated to the public copy, behaviour when the target locks again for polling is undefined + // might overwrite local change with old flag still valid in public window (which will undo resetting the flag and result in infinite re-execution of the last AM until host writes new flag to public win) + MPI_Win_lock(MPI_LOCK_EXCLUSIVE, this_node_, 0, peers[this_node_].msg_flag_win); + peers[this_node_].msg_flag_data.get()[offset + buffer_index].flag = FLAG_FALSE; // offset==0 for non-hosts + MPI_Win_unlock(this_node_, peers[this_node_].msg_flag_win); + + if (received_flag != NO_BUFFER_INDEX) { // the flag contains the next buffer index to poll on + peers[node].next_flag = received_flag; + } - HAM_DEBUG( HAM_LOG << "communicator::recv_msg(): done " << ""/*poll.min().count()*/ << std::endl; ) + HAM_DEBUG( HAM_LOG << "communicator::recv_msg(): done " << std::endl; ) - if (this_node_ == host_node_) { - size_t offset = constants::MSG_BUFFERS * node; - return &peers[host_node_].msg_data.get()[offset + buffer_index]; - } else { - return &peers[this_node_].msg_data.get()[buffer_index]; - } + return &peers[this_node_].msg_flag_data.get()[offset + buffer_index]; // offset==0 for non-hosts } // to be used by the offload target's main loop: synchronously receive one message at a time @@ -544,45 +519,48 @@ class communicator { /* static msg_buffer buffer; // NOTE ! MPI_Recv(&buffer, size, MPI_BYTE, host_node_, constants::DEFAULT_TAG, MPI_COMM_WORLD, MPI_STATUS_IGNORE); return static_cast(&buffer); */ - return recv_msg(host_node_, NO_BUFFER_INDEX, msg, size); + return static_cast(recv_msg(host_node_, NO_BUFFER_INDEX, msg, size)); } - // trigger receiving the result of a message on the sending side + // trigger asyncly receiving the result of a message on the sending side void recv_result(request_reference_type req) { - // nothing todo here, since this communicator implementation uses one-sided communication + // nothing to do here, since this communicator implementation uses one-sided communication // the data will be written to where it is expected - - // MPI_Irecv(static_cast(&peers[req.target_node].msg_buffers[req.recv_buffer_index]), constants::MSG_SIZE, MPI_BYTE, req.target_node, constants::RESULT_TAG, MPI_COMM_WORLD, &req.next_mpi_request()); - return; + HAM_DEBUG( HAM_LOG << "communicator::recv_result(): This does nothing with the MPI RMA communicator" << std::endl; ) + return; } - // only used by the host - bool test_local_flag(node_t node, size_t buffer_index) - { - size_t offset = (constants::MSG_SIZE + constants::FLAG_SIZE) * node; - volatile size_t * local_flag = reinterpret_cast(&peers[node].flag_data.get()[buffer_index]); - return *local_flag != FLAG_FALSE; + // only used by the host through request.test() (top of this file) called by future.test() (offload.hpp) + bool test_local_flag(node_t node, size_t buffer_index) { + size_t offset = constants::MSG_BUFFERS * node; + flag_t temp_flag = FLAG_FALSE; + // public window flag changes may have not have been updated in local window... so we need to lock again here + MPI_Win_lock(MPI_LOCK_EXCLUSIVE, this_node_, 0, peers[this_node_].msg_flag_win); + temp_flag = peers[node].msg_flag_data.get()[offset + buffer_index].flag; + MPI_Win_unlock(this_node_, peers[this_node_].msg_flag_win); + return temp_flag != FLAG_FALSE; } - void reset_flags(buffer_ptr flags, size_t size) - { - cache_line_buffer fill_value; - cache_line_buffer* fill_value_ptr = &fill_value; - // null fill_value - std::fill(reinterpret_cast(fill_value_ptr), reinterpret_cast(fill_value_ptr) + sizeof(cache_line_buffer), 0); - // set to flag false - *reinterpret_cast(fill_value_ptr) = FLAG_FALSE; - // set all flags to fill_value - std::fill(flags.get(), flags.get() + size, fill_value); + void flush_data(node_t node) { + MPI_Win_flush(node, peers[node].rma_data_win); + } + + // this is only called @ communicator construction to initialize flags with FLAG_FALSE + // calling this at any other point may reset flags belonging to messages that have not yet been executed (and never will be then) + void reset_flags(buffer_ptr msg_flags, size_t size) { + // now this is where a struct of arrays would have been cooler... + // TODO - Daniel: Ask Matthias if he knows a cooler solution + for (int i = 0; i <= size ; ++i) { + msg_flags.get()[i].flag = FLAG_FALSE; + } } // in MPI RMA backend only used by copy // host uses async version // targets don't send data to host as host uses rma get template - void send_data(T* local_source, buffer_ptr remote_dest, size_t size) - { + void send_data(T* local_source, buffer_ptr remote_dest, size_t size) { // execute transfer MPI_Put(local_source, size * sizeof(T), MPI_BYTE, remote_dest.node(), remote_dest.get_mpi_address(), size * sizeof(T), MPI_BYTE, peers[remote_dest.node()].rma_data_win); MPI_Win_flush(remote_dest.node(), peers[remote_dest.node()].rma_data_win); @@ -590,10 +568,8 @@ class communicator { // to be used by the host only template - void send_data_async(request_reference_type req, T* local_source, buffer_ptr remote_dest, size_t size) - { - req.uses_rma_ = true; - + void send_data_async(request_reference_type req, T* local_source, buffer_ptr remote_dest, size_t size) { + req.data_transfer_type = constants::DATA_PUT_CODE; MPI_Rput(local_source, size * sizeof(T), MPI_BYTE, remote_dest.node(), remote_dest.get_mpi_address(), size * sizeof(T), MPI_BYTE, peers[remote_dest.node()].rma_data_win, &req.next_mpi_request()); } @@ -610,20 +586,18 @@ class communicator { // to be used by the host template - void recv_data_async(request_reference_type req, buffer_ptr remote_source, T* local_dest, size_t size) - { - req.uses_rma_ = true; - - MPI_Rget(local_dest, size * sizeof(T), MPI_BYTE, remote_source.node(), remote_source.get_mpi_address(), size * sizeof(T), MPI_BYTE, peers[remote_source.node()].rma_data_win, &req.next_mpi_request()); + void recv_data_async(request_reference_type req, buffer_ptr remote_source, T* local_dest, size_t size) { + req.data_transfer_type = constants::DATA_GET_CODE; + MPI_Rget(local_dest, size * sizeof(T), MPI_BYTE, remote_source.node(), remote_source.get_mpi_address(), size * sizeof(T), MPI_BYTE, peers[remote_source.node()].rma_data_win, &req.next_mpi_request()); } template buffer_ptr allocate_buffer(const size_t n, node_t source_node) { T* ptr; - //int err = - posix_memalign((void**)&ptr, constants::CACHE_LINE_SIZE, n * sizeof(T)); + // posix_memalign((void**)&ptr, constants::CACHE_LINE_SIZE, n * sizeof(T)); + MPI_Alloc_mem(n * sizeof(T), MPI_INFO_NULL, &ptr); // attach to own window HAM_DEBUG( HAM_LOG << "communicator::allocate_buffer(), allocating buffer @: " << (long)ptr << std::endl; ) MPI_Win_attach(peers[this_node_].rma_data_win, (void*)ptr, n * sizeof(T)); @@ -639,7 +613,9 @@ class communicator { buffer_ptr allocate_peer_buffer(const size_t n, node_t source_node) { T* ptr; - posix_memalign((void**)&ptr, constants::CACHE_LINE_SIZE, n * sizeof(T)); + // posix_memalign((void**)&ptr, constants::CACHE_LINE_SIZE, n * sizeof(T)); // if you revert to memalign, also change back free in free_peer_buffer() + // using MPI_Alloc instead as these buffers are used for RMA accesses + MPI_Alloc_mem(n * sizeof(T), MPI_INFO_NULL, &ptr); // NOTE: no ctor is called return buffer_ptr(ptr, this_node_); } @@ -656,18 +632,19 @@ class communicator { /* for (node_t i = 1; i < nodes_; ++i) { // nonsense, all accesses to a rank will only take place on that targets window, no need to attach to other MPI_Win_detach(peers[i].rma_data_win, ptr.get()); } */ - free(static_cast(ptr.get())); + // free(static_cast(ptr.get())); // switch back to this if you revert back from using MPI_alloc_mem() + MPI_Free_mem(ptr.get()); } // for host to free peer message buffers, needed because original function now manages rma window which must not happen for host-only local buffers template void free_peer_buffer(buffer_ptr ptr) { - // TODO DANIEL: this is where mem is freed that should be mapped to static mpi windows - // i dont think this is ever called on the actual memory mapped to static mpi windows, freeing it would equal "disconnecting" corresponding target + // this will never be called on the actual memory mapped to static mpi windows, freeing it would equal "disconnecting" the corresponding target assert(ptr.node() == this_node_); // NOTE: no dtor is called - free(static_cast(ptr.get())); + // free(static_cast(ptr.get())); // switch back to this if you revert back from using MPI_alloc_mem() + MPI_Free_mem(ptr.get()); } static communicator& instance() { return *instance_; } @@ -689,7 +666,8 @@ class communicator { std::vector node_descriptions; // not as member in peer below, because Allgather is used to exchange node descriptions struct mpi_peer { - buffer_ptr msg_buffers; // buffers used for MPI_ISend and IRecv by the sender // buffers used for MPI_RPut and RGet + + // buffer_ptr msg_buffers; // buffers used for MPI_ISend and IRecv by the sender // not needed for RMA version, host-side RMA window is used instead // needed by sender to manage which buffers are in use and which are free // just manages indices, that can be used by From a7ad374b8cabd0a3a831e64ca91e9e9dc34ae7d4 Mon Sep 17 00:00:00 2001 From: Daniel Deppisch Date: Sun, 28 Oct 2018 16:59:10 +0100 Subject: [PATCH 070/150] added CMake --- CMakeLists.txt | 48 ++ src/CMakeLists.txt | 83 +++ src/ham/CMakeLists.txt | 70 +++ thirdparty/bmt/AUTHORS.md | 4 + thirdparty/bmt/CMakeLists.txt | 26 + thirdparty/bmt/LICENSE_1_0.txt | 23 + thirdparty/bmt/README.md | 21 + thirdparty/bmt/build/CMakeCache.txt | 278 +++++++++ .../CMakeFiles/3.5.2/CMakeCXXCompiler.cmake | 68 +++ .../3.5.2/CMakeDetermineCompilerABI_CXX.bin | Bin 0 -> 12552 bytes .../build/CMakeFiles/3.5.2/CMakeSystem.cmake | 15 + .../CompilerIdCXX/CMakeCXXCompilerId.cpp | 533 ++++++++++++++++++ .../CMakeFiles/3.5.2/CompilerIdCXX/a.out | Bin 0 -> 12704 bytes .../CMakeDirectoryInformation.cmake | 16 + .../bmt/build/CMakeFiles/CMakeOutput.log | 339 +++++++++++ .../bmt/build/CMakeFiles/Makefile.cmake | 95 ++++ thirdparty/bmt/build/CMakeFiles/Makefile2 | 126 +++++ .../build/CMakeFiles/TargetDirectories.txt | 5 + .../bmt/build/CMakeFiles/cmake.check_cache | 1 + .../bmt/build/CMakeFiles/feature_tests.bin | Bin 0 -> 16600 bytes .../bmt/build/CMakeFiles/feature_tests.cxx | 405 +++++++++++++ .../bmt/build/CMakeFiles/progress.marks | 1 + thirdparty/bmt/build/Makefile | 148 +++++ thirdparty/bmt/build/cmake_install.cmake | 50 ++ thirdparty/bmt/build/example | Bin 0 -> 72600 bytes .../CMakeDirectoryInformation.cmake | 16 + .../CMakeFiles/example.dir/CXX.includecache | 36 ++ .../CMakeFiles/example.dir/DependInfo.cmake | 21 + .../src/CMakeFiles/example.dir/build.make | 113 ++++ .../CMakeFiles/example.dir/cmake_clean.cmake | 10 + .../CMakeFiles/example.dir/depend.internal | 6 + .../src/CMakeFiles/example.dir/depend.make | 6 + .../src/CMakeFiles/example.dir/example.cpp.o | Bin 0 -> 87416 bytes .../src/CMakeFiles/example.dir/flags.make | 10 + .../build/src/CMakeFiles/example.dir/link.txt | 1 + .../src/CMakeFiles/example.dir/progress.make | 3 + .../bmt/build/src/CMakeFiles/progress.marks | 1 + thirdparty/bmt/build/src/Makefile | 180 ++++++ thirdparty/bmt/build/src/cmake_install.cmake | 34 ++ thirdparty/bmt/include/noma/bmt/bmt.hpp | 257 +++++++++ thirdparty/bmt/src/CMakeLists.txt | 13 + thirdparty/bmt/src/example.cpp | 58 ++ 42 files changed, 3120 insertions(+) create mode 100644 CMakeLists.txt create mode 100644 src/CMakeLists.txt create mode 100644 src/ham/CMakeLists.txt create mode 100644 thirdparty/bmt/AUTHORS.md create mode 100644 thirdparty/bmt/CMakeLists.txt create mode 100644 thirdparty/bmt/LICENSE_1_0.txt create mode 100644 thirdparty/bmt/README.md create mode 100644 thirdparty/bmt/build/CMakeCache.txt create mode 100644 thirdparty/bmt/build/CMakeFiles/3.5.2/CMakeCXXCompiler.cmake create mode 100755 thirdparty/bmt/build/CMakeFiles/3.5.2/CMakeDetermineCompilerABI_CXX.bin create mode 100644 thirdparty/bmt/build/CMakeFiles/3.5.2/CMakeSystem.cmake create mode 100644 thirdparty/bmt/build/CMakeFiles/3.5.2/CompilerIdCXX/CMakeCXXCompilerId.cpp create mode 100755 thirdparty/bmt/build/CMakeFiles/3.5.2/CompilerIdCXX/a.out create mode 100644 thirdparty/bmt/build/CMakeFiles/CMakeDirectoryInformation.cmake create mode 100644 thirdparty/bmt/build/CMakeFiles/CMakeOutput.log create mode 100644 thirdparty/bmt/build/CMakeFiles/Makefile.cmake create mode 100644 thirdparty/bmt/build/CMakeFiles/Makefile2 create mode 100644 thirdparty/bmt/build/CMakeFiles/TargetDirectories.txt create mode 100644 thirdparty/bmt/build/CMakeFiles/cmake.check_cache create mode 100755 thirdparty/bmt/build/CMakeFiles/feature_tests.bin create mode 100644 thirdparty/bmt/build/CMakeFiles/feature_tests.cxx create mode 100644 thirdparty/bmt/build/CMakeFiles/progress.marks create mode 100644 thirdparty/bmt/build/Makefile create mode 100644 thirdparty/bmt/build/cmake_install.cmake create mode 100755 thirdparty/bmt/build/example create mode 100644 thirdparty/bmt/build/src/CMakeFiles/CMakeDirectoryInformation.cmake create mode 100644 thirdparty/bmt/build/src/CMakeFiles/example.dir/CXX.includecache create mode 100644 thirdparty/bmt/build/src/CMakeFiles/example.dir/DependInfo.cmake create mode 100644 thirdparty/bmt/build/src/CMakeFiles/example.dir/build.make create mode 100644 thirdparty/bmt/build/src/CMakeFiles/example.dir/cmake_clean.cmake create mode 100644 thirdparty/bmt/build/src/CMakeFiles/example.dir/depend.internal create mode 100644 thirdparty/bmt/build/src/CMakeFiles/example.dir/depend.make create mode 100644 thirdparty/bmt/build/src/CMakeFiles/example.dir/example.cpp.o create mode 100644 thirdparty/bmt/build/src/CMakeFiles/example.dir/flags.make create mode 100644 thirdparty/bmt/build/src/CMakeFiles/example.dir/link.txt create mode 100644 thirdparty/bmt/build/src/CMakeFiles/example.dir/progress.make create mode 100644 thirdparty/bmt/build/src/CMakeFiles/progress.marks create mode 100644 thirdparty/bmt/build/src/Makefile create mode 100644 thirdparty/bmt/build/src/cmake_install.cmake create mode 100644 thirdparty/bmt/include/noma/bmt/bmt.hpp create mode 100644 thirdparty/bmt/src/CMakeLists.txt create mode 100644 thirdparty/bmt/src/example.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..cf05180 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,48 @@ +# Copyright (c) 2018 Marcel Ehrhardt +# Copyright (c) 2018 Matthias Noack +# +# See accompanying file LICENSE and README for further information. + +project(ham LANGUAGES CXX) +cmake_minimum_required(VERSION 3.2 FATAL_ERROR) # TODO verfify + +# set output directory +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) +message(STATUS "CMAKE_BINARY_DIR: " ${CMAKE_BINARY_DIR}) + +### thirdparty dependencies + +# Boost +find_package(Boost 1.40 COMPONENTS program_options REQUIRED) +add_library(boost_library INTERFACE) +target_include_directories (boost_library INTERFACE ${Boost_INCLUDE_DIRS}) +target_link_libraries (boost_library INTERFACE ${Boost_LIBRARIES}) + +# MPI +find_package(MPI) # not required +if (MPI_FOUND) + add_library (mpi_library INTERFACE) + target_include_directories (mpi_library INTERFACE ${MPI_CXX_INCLUDE_PATH}) + target_compile_options (mpi_library INTERFACE ${MPI_CXX_COMPILE_FLAGS}) + target_link_libraries (mpi_library INTERFACE ${MPI_CXX_LIBRARIES}) +endif () + +# Intel SCIF (for Xeon Phi accelerators with KNC architecture) +find_file(SCIF_HEADER_FILE "scif.h") +if (SCIF_HEADER_FILE) + set(SCIF_FOUND ON) + get_filename_component(SCIF_INCLUDE_DIR "${SCIF_HEADER_FILE}" DIRECTORY) + message(STATUS "Found SCIF: ${SCIF_HEADER_FILE}") + + add_library (scif_library INTERFACE) + target_include_directories (scif_library INTERFACE ${SCIF_INCLUDE_DIR}) +else () + message(STATUS "Could NOT find SCIF (missing: scif.h)") +endif () + +# tell the compiler to be strict +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -hstd=c++11") +set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -DHAM_DEBUG_ON") + +add_subdirectory(thirdparty/bmt ${CMAKE_CURRENT_BINARY_DIR}/build.noma_bmt) +add_subdirectory(src) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt new file mode 100644 index 0000000..b34c36e --- /dev/null +++ b/src/CMakeLists.txt @@ -0,0 +1,83 @@ +# Copyright (c) 2018 Marcel Ehrhardt +# Copyright (c) 2018 Matthias Noack +# +# See accompanying file LICENSE and README for further information. + +project(ham_exe LANGUAGES CXX) +cmake_minimum_required(VERSION 3.2 FATAL_ERROR) # TODO verfify + +add_subdirectory(ham) + +### Benchmarks + +## Explicit targets (not built by default) +# Intel LEO offload directive benchmark, requires Intel compiler +if (CMAKE_CXX_COMPILER_ID STREQUAL "Intel") + add_executable(benchmark_intel_leo EXCLUDE_FROM_ALL benchmark_intel_leo.cpp) + target_link_libraries(benchmark_intel_leo ham_interface) +endif () + +if (MPI_FOUND) + add_executable (benchmark_ham_offload_mpi benchmark_ham_offload.cpp) + target_link_libraries (benchmark_ham_offload_mpi ham_offload_mpi) + + add_executable (benchmark_ham_offload_mpi_rma_dynamic benchmark_ham_offload.cpp) + target_link_libraries (benchmark_ham_offload_mpi_rma_dynamic ham_offload_mpi_rma_dynamic) +endif() + +if (SCIF_FOUND) + add_executable (benchmark_ham_offload_scif benchmark_ham_offload.cpp) + target_link_libraries (benchmark_ham_offload_scif ham_offload_scif) +endif() + +### Examples/Tests + +# some tests for the active msg layer +add_executable(active_msgs active_msgs.cpp) +target_link_libraries(active_msgs ham_interface) + +if (MPI_FOUND) + add_executable(ham_offload_test_mpi ham_offload.cpp) + target_link_libraries(ham_offload_test_mpi ham_offload_mpi) + + add_executable(ham_offload_test_explicit_mpi ham_offload_explicit.cpp) + target_link_libraries(ham_offload_test_explicit_mpi ham_offload_mpi_explicit) + + add_executable(inner_product_mpi inner_product.cpp) + target_link_libraries(inner_product_mpi ham_offload_mpi) + + add_executable(test_data_transfer_mpi test_data_transfer.cpp) + target_link_libraries(test_data_transfer_mpi ham_offload_mpi) + + add_executable(test_argument_transfer_mpi test_argument_transfer.cpp) + target_link_libraries(test_argument_transfer_mpi ham_offload_mpi) + + add_executable(ham_offload_test_mpi_rma_dynamic ham_offload.cpp) + target_link_libraries(ham_offload_test_mpi_rma_dynamic ham_offload_mpi_rma_dynamic) + + add_executable(inner_product_mpi_rma_dynamic inner_product.cpp) + target_link_libraries(inner_product_mpi_rma_dynamic ham_offload_mpi_rma_dynamic) + + add_executable(test_data_transfer_mpi_rma_dynamic test_data_transfer.cpp) + target_link_libraries(test_data_transfer_mpi_rma_dynamic ham_offload_mpi_rma_dynamic) + + add_executable(test_argument_transfer_mpi_rma_dynamic test_argument_transfer.cpp) + target_link_libraries(test_argument_transfer_mpi_rma_dynamic ham_offload_mpi_rma_dynamic) +endif() + +if (SCIF_FOUND) + add_executable(ham_offload_test_scif ham_offload.cpp) + target_link_libraries(ham_offload_test_scif ham_offload_scif) + + add_executable(ham_offload_test_explicit_scif ham_offload_explicit.cpp) + target_link_libraries(ham_offload_test_explicit_scif ham_offload_scif_explicit) + + add_executable(inner_product_scif inner_product.cpp) + target_link_libraries(inner_product_scif ham_offload_scif) + + add_executable(test_data_transfer_scif test_data_transfer.cpp) + target_link_libraries(test_data_transfer_scif ham_offload_scif) + + add_executable(test_argument_transfer_scif test_argument_transfer.cpp) + target_link_libraries(test_argument_transfer_scif ham_offload_scif) +endif() diff --git a/src/ham/CMakeLists.txt b/src/ham/CMakeLists.txt new file mode 100644 index 0000000..278d452 --- /dev/null +++ b/src/ham/CMakeLists.txt @@ -0,0 +1,70 @@ +# Copyright (c) 2018 Marcel Ehrhardt +# Copyright (c) 2018 Matthias Noack +# +# See accompanying file LICENSE and README for further information. + +project(ham_lib LANGUAGES CXX) +cmake_minimum_required(VERSION 3.2 FATAL_ERROR) # TODO verfify + +# interface target for ham +add_library(ham_interface INTERFACE) +target_compile_features(ham_interface INTERFACE cxx_auto_type cxx_range_for cxx_variadic_templates) +target_link_libraries(ham_interface INTERFACE noma_bmt boost_library) +target_include_directories(ham_interface INTERFACE ${CMAKE_CURRENT_LIST_DIR}/../../include) + +set(HAM_LIB_SRC + net/communicator.cpp + net/communicator_mpi.cpp + net/communicator_mpi_rma_dynamic.cpp + offload/runtime.cpp + offload/offload.cpp + util/cpu_affinity.cpp) + +if (MPI_FOUND) + add_library(ham_offload_mpi # SHARED if BUILD_SHARED_LIBS = TRUE + ${HAM_LIB_SRC} + offload/main.cpp + net/communicator_mpi.cpp) + target_compile_definitions(ham_offload_mpi PUBLIC -DHAM_COMM_MPI=1) + target_link_libraries(ham_offload_mpi PUBLIC ham_interface mpi_library) + + add_library(ham_offload_mpi_explicit # SHARED if BUILD_SHARED_LIBS = TRUE + ${HAM_LIB_SRC} + offload/main_explicit.cpp + net/communicator_mpi.cpp) + target_compile_definitions(ham_offload_mpi_explicit PUBLIC -DHAM_COMM_MPI=1 -DHAM_EXPLICIT=1) + target_link_libraries(ham_offload_mpi_explicit PUBLIC ham_interface mpi_library) + + add_library(ham_offload_mpi_rma_dynamic # SHARED if BUILD_SHARED_LIBS = TRUE + ${HAM_LIB_SRC} + offload/main.cpp + net/communicator_mpi_rma_dynamic.cpp) + target_compile_definitions(ham_offload_mpi_rma_dynamic PUBLIC -DHAM_COMM_MPI_RMA_DYNAMIC=1) + target_link_libraries(ham_offload_mpi_rma_dynamic PUBLIC ham_interface mpi_library) + + set_target_properties(ham_offload_mpi ham_offload_mpi_explicit ham_offload_mpi_rma_dynamic PROPERTIES + CXX_STANDARD 11 + CXX_STANDARD_REQUIRED YES + CXX_EXTENSIONS NO) +endif () + +if (SCIF_FOUND) + add_library(ham_offload_scif # SHARED if BUILD_SHARED_LIBS = TRUE + ${HAM_LIB_SRC} + offload/main.cpp + net/communicator_scif.cpp) + target_compile_definitions(ham_offload_scif PUBLIC -DHAM_COMM_SCIF=1) + target_link_libraries(ham_offload_scif_explicit PUBLIC ham_interface scif_library) + + add_library(ham_offload_scif_explicit # SHARED if BUILD_SHARED_LIBS = TRUE + ${HAM_LIB_SRC} + offload/main_explicit.cpp + net/communicator_scif.cpp) + target_compile_definitions(ham_offload_scif_explicit PUBLIC -DHAM_COMM_SCIF=1 -DHAM_EXPLICIT=1) + target_link_libraries(ham_offload_scif_explicit PUBLIC ham_interface scif_library) + + set_target_properties(ham_offload_scif ham_offload_scif_explicit PROPERTIES + CXX_STANDARD 11 + CXX_STANDARD_REQUIRED YES + CXX_EXTENSIONS NO) +endif () diff --git a/thirdparty/bmt/AUTHORS.md b/thirdparty/bmt/AUTHORS.md new file mode 100644 index 0000000..96e8fa0 --- /dev/null +++ b/thirdparty/bmt/AUTHORS.md @@ -0,0 +1,4 @@ +# Original Author + +Matthias Noack + diff --git a/thirdparty/bmt/CMakeLists.txt b/thirdparty/bmt/CMakeLists.txt new file mode 100644 index 0000000..464c511 --- /dev/null +++ b/thirdparty/bmt/CMakeLists.txt @@ -0,0 +1,26 @@ +# Copyright (c) 2017 Matthias Noack +# +# See accompanying file LICENSE and README for further information. + +cmake_minimum_required(VERSION 3.1 FATAL_ERROR) + +project(libnoma_bmt LANGUAGES CXX) + + +set(CMAKE_CXX_STANDARD 11) +set(CMAKE_CXX_STANDARD_REQUIRED ON) +set(CMAKE_CXX_EXTENSIONS OFF) + +# header only library +add_library(noma_bmt INTERFACE) +# NOTE: we want to use '#include "noma/bmt/bmt.hpp"', not '#include "bmt.hpp"' +target_include_directories(noma_bmt INTERFACE include) +target_compile_features(noma_bmt INTERFACE ) + +#set_target_properties(noma_bmt PROPERTIES +# CXX_STANDARD 11 +# CXX_STANDARD_REQUIRED YES +# CXX_EXTENSIONS NO +#) + +add_subdirectory(src) diff --git a/thirdparty/bmt/LICENSE_1_0.txt b/thirdparty/bmt/LICENSE_1_0.txt new file mode 100644 index 0000000..36b7cd9 --- /dev/null +++ b/thirdparty/bmt/LICENSE_1_0.txt @@ -0,0 +1,23 @@ +Boost Software License - Version 1.0 - August 17th, 2003 + +Permission is hereby granted, free of charge, to any person or organization +obtaining a copy of the software and accompanying documentation covered by +this license (the "Software") to use, reproduce, display, distribute, +execute, and transmit the Software, and to prepare derivative works of the +Software, and to permit third-parties to whom the Software is furnished to +do so, all subject to the following: + +The copyright notices in the Software and this entire statement, including +the above license grant, this restriction and the following disclaimer, +must be included in all copies of the Software, in whole or in part, and +all derivative works of the Software, unless such copies or derivative +works are solely in the form of machine-executable object code generated by +a source language processor. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT +SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE +FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. diff --git a/thirdparty/bmt/README.md b/thirdparty/bmt/README.md new file mode 100644 index 0000000..1c53432 --- /dev/null +++ b/thirdparty/bmt/README.md @@ -0,0 +1,21 @@ +# Benchmark Timer Library + +A simple C++11 header-only library that provides a `timer` and a `statistics` class for benchmarking. + +See `src/example.cpp` for usage. + +## Building and Running the example + +Building: + +```bash +mkdir build +cd build +cmake -DNOMA_BMT_BUILD_EXAMPLES=TRUE .. +make +``` + +```bash +./example +``` + diff --git a/thirdparty/bmt/build/CMakeCache.txt b/thirdparty/bmt/build/CMakeCache.txt new file mode 100644 index 0000000..9b797af --- /dev/null +++ b/thirdparty/bmt/build/CMakeCache.txt @@ -0,0 +1,278 @@ +# This is the CMakeCache file. +# For build in directory: /home/bemdeppi/ham/thirdparty/bmt/build +# It was generated by CMake: /usr/bin/cmake +# You can edit this file to change values found and used by cmake. +# If you do not want to change any of the values, simply exit the editor. +# If you do want to change a value, simply edit, save, and exit the editor. +# The syntax for the file is as follows: +# KEY:TYPE=VALUE +# KEY is the name of a variable in the cache. +# TYPE is a hint to GUIs for the type of VALUE, DO NOT EDIT TYPE!. +# VALUE is the current value for the KEY. + +######################## +# EXTERNAL cache entries +######################## + +//Path to a program. +CMAKE_AR:FILEPATH=/usr/bin/ar + +//Choose the type of build, options are: None(CMAKE_CXX_FLAGS or +// CMAKE_C_FLAGS used) Debug Release RelWithDebInfo MinSizeRel. +CMAKE_BUILD_TYPE:STRING= + +//Enable/Disable color output during build. +CMAKE_COLOR_MAKEFILE:BOOL=ON + +//CXX compiler +CMAKE_CXX_COMPILER:FILEPATH=/usr/bin/c++ + +//Flags used by the compiler during all build types. +CMAKE_CXX_FLAGS:STRING= + +//Flags used by the compiler during debug builds. +CMAKE_CXX_FLAGS_DEBUG:STRING=-g + +//Flags used by the compiler during release builds for minimum +// size. +CMAKE_CXX_FLAGS_MINSIZEREL:STRING=-Os -DNDEBUG + +//Flags used by the compiler during release builds. +CMAKE_CXX_FLAGS_RELEASE:STRING=-O3 -DNDEBUG + +//Flags used by the compiler during release builds with debug info. +CMAKE_CXX_FLAGS_RELWITHDEBINFO:STRING=-O2 -g -DNDEBUG + +//Flags used by the linker. +CMAKE_EXE_LINKER_FLAGS:STRING= + +//Flags used by the linker during debug builds. +CMAKE_EXE_LINKER_FLAGS_DEBUG:STRING= + +//Flags used by the linker during release minsize builds. +CMAKE_EXE_LINKER_FLAGS_MINSIZEREL:STRING= + +//Flags used by the linker during release builds. +CMAKE_EXE_LINKER_FLAGS_RELEASE:STRING= + +//Flags used by the linker during Release with Debug Info builds. +CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO:STRING= + +//Enable/Disable output of compile commands during generation. +CMAKE_EXPORT_COMPILE_COMMANDS:BOOL=OFF + +//Install path prefix, prepended onto install directories. +CMAKE_INSTALL_PREFIX:PATH=/usr/local + +//Path to a program. +CMAKE_LINKER:FILEPATH=/usr/bin/ld + +//Path to a program. +CMAKE_MAKE_PROGRAM:FILEPATH=/usr/bin/gmake + +//Flags used by the linker during the creation of modules. +CMAKE_MODULE_LINKER_FLAGS:STRING= + +//Flags used by the linker during debug builds. +CMAKE_MODULE_LINKER_FLAGS_DEBUG:STRING= + +//Flags used by the linker during release minsize builds. +CMAKE_MODULE_LINKER_FLAGS_MINSIZEREL:STRING= + +//Flags used by the linker during release builds. +CMAKE_MODULE_LINKER_FLAGS_RELEASE:STRING= + +//Flags used by the linker during Release with Debug Info builds. +CMAKE_MODULE_LINKER_FLAGS_RELWITHDEBINFO:STRING= + +//Path to a program. +CMAKE_NM:FILEPATH=/usr/bin/nm + +//Path to a program. +CMAKE_OBJCOPY:FILEPATH=/usr/bin/objcopy + +//Path to a program. +CMAKE_OBJDUMP:FILEPATH=/usr/bin/objdump + +//Value Computed by CMake +CMAKE_PROJECT_NAME:STATIC=libnoma_bmt + +//Path to a program. +CMAKE_RANLIB:FILEPATH=/usr/bin/ranlib + +//Flags used by the linker during the creation of dll's. +CMAKE_SHARED_LINKER_FLAGS:STRING= + +//Flags used by the linker during debug builds. +CMAKE_SHARED_LINKER_FLAGS_DEBUG:STRING= + +//Flags used by the linker during release minsize builds. +CMAKE_SHARED_LINKER_FLAGS_MINSIZEREL:STRING= + +//Flags used by the linker during release builds. +CMAKE_SHARED_LINKER_FLAGS_RELEASE:STRING= + +//Flags used by the linker during Release with Debug Info builds. +CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFO:STRING= + +//If set, runtime paths are not added when installing shared libraries, +// but are added when building. +CMAKE_SKIP_INSTALL_RPATH:BOOL=NO + +//If set, runtime paths are not added when using shared libraries. +CMAKE_SKIP_RPATH:BOOL=NO + +//Flags used by the linker during the creation of static libraries. +CMAKE_STATIC_LINKER_FLAGS:STRING= + +//Flags used by the linker during debug builds. +CMAKE_STATIC_LINKER_FLAGS_DEBUG:STRING= + +//Flags used by the linker during release minsize builds. +CMAKE_STATIC_LINKER_FLAGS_MINSIZEREL:STRING= + +//Flags used by the linker during release builds. +CMAKE_STATIC_LINKER_FLAGS_RELEASE:STRING= + +//Flags used by the linker during Release with Debug Info builds. +CMAKE_STATIC_LINKER_FLAGS_RELWITHDEBINFO:STRING= + +//Path to a program. +CMAKE_STRIP:FILEPATH=/usr/bin/strip + +//If this value is on, makefiles will be generated without the +// .SILENT directive, and all commands will be echoed to the console +// during the make. This is useful for debugging only. With Visual +// Studio IDE projects all commands are done without /nologo. +CMAKE_VERBOSE_MAKEFILE:BOOL=FALSE + +//No help, variable specified on the command line. +NOMA_BMT_BUILD_EXAMPLES:UNINITIALIZED=TRUE + +//Value Computed by CMake +libnoma_bmt_BINARY_DIR:STATIC=/home/bemdeppi/ham/thirdparty/bmt/build + +//Value Computed by CMake +libnoma_bmt_SOURCE_DIR:STATIC=/home/bemdeppi/ham/thirdparty/bmt + + +######################## +# INTERNAL cache entries +######################## + +//ADVANCED property for variable: CMAKE_AR +CMAKE_AR-ADVANCED:INTERNAL=1 +//This is the directory where this CMakeCache.txt was created +CMAKE_CACHEFILE_DIR:INTERNAL=/home/bemdeppi/ham/thirdparty/bmt/build +//Major version of cmake used to create the current loaded cache +CMAKE_CACHE_MAJOR_VERSION:INTERNAL=3 +//Minor version of cmake used to create the current loaded cache +CMAKE_CACHE_MINOR_VERSION:INTERNAL=5 +//Patch version of cmake used to create the current loaded cache +CMAKE_CACHE_PATCH_VERSION:INTERNAL=2 +//ADVANCED property for variable: CMAKE_COLOR_MAKEFILE +CMAKE_COLOR_MAKEFILE-ADVANCED:INTERNAL=1 +//Path to CMake executable. +CMAKE_COMMAND:INTERNAL=/usr/bin/cmake +//Path to cpack program executable. +CMAKE_CPACK_COMMAND:INTERNAL=/usr/bin/cpack +//Path to ctest program executable. +CMAKE_CTEST_COMMAND:INTERNAL=/usr/bin/ctest +//ADVANCED property for variable: CMAKE_CXX_COMPILER +CMAKE_CXX_COMPILER-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_CXX_FLAGS +CMAKE_CXX_FLAGS-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_CXX_FLAGS_DEBUG +CMAKE_CXX_FLAGS_DEBUG-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_CXX_FLAGS_MINSIZEREL +CMAKE_CXX_FLAGS_MINSIZEREL-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_CXX_FLAGS_RELEASE +CMAKE_CXX_FLAGS_RELEASE-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_CXX_FLAGS_RELWITHDEBINFO +CMAKE_CXX_FLAGS_RELWITHDEBINFO-ADVANCED:INTERNAL=1 +//Path to cache edit program executable. +CMAKE_EDIT_COMMAND:INTERNAL=/usr/bin/ccmake +//Executable file format +CMAKE_EXECUTABLE_FORMAT:INTERNAL=ELF +//ADVANCED property for variable: CMAKE_EXE_LINKER_FLAGS +CMAKE_EXE_LINKER_FLAGS-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_EXE_LINKER_FLAGS_DEBUG +CMAKE_EXE_LINKER_FLAGS_DEBUG-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_EXE_LINKER_FLAGS_MINSIZEREL +CMAKE_EXE_LINKER_FLAGS_MINSIZEREL-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_EXE_LINKER_FLAGS_RELEASE +CMAKE_EXE_LINKER_FLAGS_RELEASE-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO +CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_EXPORT_COMPILE_COMMANDS +CMAKE_EXPORT_COMPILE_COMMANDS-ADVANCED:INTERNAL=1 +//Name of external makefile project generator. +CMAKE_EXTRA_GENERATOR:INTERNAL= +//Name of generator. +CMAKE_GENERATOR:INTERNAL=Unix Makefiles +//Name of generator platform. +CMAKE_GENERATOR_PLATFORM:INTERNAL= +//Name of generator toolset. +CMAKE_GENERATOR_TOOLSET:INTERNAL= +//Source directory with the top level CMakeLists.txt file for this +// project +CMAKE_HOME_DIRECTORY:INTERNAL=/home/bemdeppi/ham/thirdparty/bmt +//Install .so files without execute permission. +CMAKE_INSTALL_SO_NO_EXE:INTERNAL=0 +//ADVANCED property for variable: CMAKE_LINKER +CMAKE_LINKER-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_MAKE_PROGRAM +CMAKE_MAKE_PROGRAM-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_MODULE_LINKER_FLAGS +CMAKE_MODULE_LINKER_FLAGS-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_MODULE_LINKER_FLAGS_DEBUG +CMAKE_MODULE_LINKER_FLAGS_DEBUG-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_MODULE_LINKER_FLAGS_MINSIZEREL +CMAKE_MODULE_LINKER_FLAGS_MINSIZEREL-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_MODULE_LINKER_FLAGS_RELEASE +CMAKE_MODULE_LINKER_FLAGS_RELEASE-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_MODULE_LINKER_FLAGS_RELWITHDEBINFO +CMAKE_MODULE_LINKER_FLAGS_RELWITHDEBINFO-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_NM +CMAKE_NM-ADVANCED:INTERNAL=1 +//number of local generators +CMAKE_NUMBER_OF_MAKEFILES:INTERNAL=2 +//ADVANCED property for variable: CMAKE_OBJCOPY +CMAKE_OBJCOPY-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_OBJDUMP +CMAKE_OBJDUMP-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_RANLIB +CMAKE_RANLIB-ADVANCED:INTERNAL=1 +//Path to CMake installation. +CMAKE_ROOT:INTERNAL=/usr/share/cmake +//ADVANCED property for variable: CMAKE_SHARED_LINKER_FLAGS +CMAKE_SHARED_LINKER_FLAGS-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_SHARED_LINKER_FLAGS_DEBUG +CMAKE_SHARED_LINKER_FLAGS_DEBUG-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_SHARED_LINKER_FLAGS_MINSIZEREL +CMAKE_SHARED_LINKER_FLAGS_MINSIZEREL-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_SHARED_LINKER_FLAGS_RELEASE +CMAKE_SHARED_LINKER_FLAGS_RELEASE-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFO +CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFO-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_SKIP_INSTALL_RPATH +CMAKE_SKIP_INSTALL_RPATH-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_SKIP_RPATH +CMAKE_SKIP_RPATH-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_STATIC_LINKER_FLAGS +CMAKE_STATIC_LINKER_FLAGS-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_STATIC_LINKER_FLAGS_DEBUG +CMAKE_STATIC_LINKER_FLAGS_DEBUG-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_STATIC_LINKER_FLAGS_MINSIZEREL +CMAKE_STATIC_LINKER_FLAGS_MINSIZEREL-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_STATIC_LINKER_FLAGS_RELEASE +CMAKE_STATIC_LINKER_FLAGS_RELEASE-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_STATIC_LINKER_FLAGS_RELWITHDEBINFO +CMAKE_STATIC_LINKER_FLAGS_RELWITHDEBINFO-ADVANCED:INTERNAL=1 +//ADVANCED property for variable: CMAKE_STRIP +CMAKE_STRIP-ADVANCED:INTERNAL=1 +//uname command +CMAKE_UNAME:INTERNAL=/usr/bin/uname +//ADVANCED property for variable: CMAKE_VERBOSE_MAKEFILE +CMAKE_VERBOSE_MAKEFILE-ADVANCED:INTERNAL=1 + diff --git a/thirdparty/bmt/build/CMakeFiles/3.5.2/CMakeCXXCompiler.cmake b/thirdparty/bmt/build/CMakeFiles/3.5.2/CMakeCXXCompiler.cmake new file mode 100644 index 0000000..eadb4d9 --- /dev/null +++ b/thirdparty/bmt/build/CMakeFiles/3.5.2/CMakeCXXCompiler.cmake @@ -0,0 +1,68 @@ +set(CMAKE_CXX_COMPILER "/usr/bin/c++") +set(CMAKE_CXX_COMPILER_ARG1 "") +set(CMAKE_CXX_COMPILER_ID "GNU") +set(CMAKE_CXX_COMPILER_VERSION "4.8.5") +set(CMAKE_CXX_COMPILER_WRAPPER "") +set(CMAKE_CXX_STANDARD_COMPUTED_DEFAULT "98") +set(CMAKE_CXX_COMPILE_FEATURES "cxx_template_template_parameters;cxx_alias_templates;cxx_alignas;cxx_alignof;cxx_attributes;cxx_auto_type;cxx_constexpr;cxx_decltype;cxx_decltype_incomplete_return_types;cxx_default_function_template_args;cxx_defaulted_functions;cxx_defaulted_move_initializers;cxx_delegating_constructors;cxx_deleted_functions;cxx_enum_forward_declarations;cxx_explicit_conversions;cxx_extended_friend_declarations;cxx_extern_templates;cxx_final;cxx_func_identifier;cxx_generalized_initializers;cxx_inheriting_constructors;cxx_inline_namespaces;cxx_lambdas;cxx_local_type_template_args;cxx_long_long_type;cxx_noexcept;cxx_nonstatic_member_init;cxx_nullptr;cxx_override;cxx_range_for;cxx_raw_string_literals;cxx_reference_qualified_functions;cxx_right_angle_brackets;cxx_rvalue_references;cxx_sizeof_member;cxx_static_assert;cxx_strong_enums;cxx_thread_local;cxx_trailing_return_types;cxx_unicode_literals;cxx_uniform_initialization;cxx_unrestricted_unions;cxx_user_literals;cxx_variadic_macros;cxx_variadic_templates") +set(CMAKE_CXX98_COMPILE_FEATURES "cxx_template_template_parameters") +set(CMAKE_CXX11_COMPILE_FEATURES "cxx_alias_templates;cxx_alignas;cxx_alignof;cxx_attributes;cxx_auto_type;cxx_constexpr;cxx_decltype;cxx_decltype_incomplete_return_types;cxx_default_function_template_args;cxx_defaulted_functions;cxx_defaulted_move_initializers;cxx_delegating_constructors;cxx_deleted_functions;cxx_enum_forward_declarations;cxx_explicit_conversions;cxx_extended_friend_declarations;cxx_extern_templates;cxx_final;cxx_func_identifier;cxx_generalized_initializers;cxx_inheriting_constructors;cxx_inline_namespaces;cxx_lambdas;cxx_local_type_template_args;cxx_long_long_type;cxx_noexcept;cxx_nonstatic_member_init;cxx_nullptr;cxx_override;cxx_range_for;cxx_raw_string_literals;cxx_reference_qualified_functions;cxx_right_angle_brackets;cxx_rvalue_references;cxx_sizeof_member;cxx_static_assert;cxx_strong_enums;cxx_thread_local;cxx_trailing_return_types;cxx_unicode_literals;cxx_uniform_initialization;cxx_unrestricted_unions;cxx_user_literals;cxx_variadic_macros;cxx_variadic_templates") +set(CMAKE_CXX14_COMPILE_FEATURES "") + +set(CMAKE_CXX_PLATFORM_ID "Linux") +set(CMAKE_CXX_SIMULATE_ID "") +set(CMAKE_CXX_SIMULATE_VERSION "") + +set(CMAKE_AR "/usr/bin/ar") +set(CMAKE_RANLIB "/usr/bin/ranlib") +set(CMAKE_LINKER "/usr/bin/ld") +set(CMAKE_COMPILER_IS_GNUCXX 1) +set(CMAKE_CXX_COMPILER_LOADED 1) +set(CMAKE_CXX_COMPILER_WORKS TRUE) +set(CMAKE_CXX_ABI_COMPILED TRUE) +set(CMAKE_COMPILER_IS_MINGW ) +set(CMAKE_COMPILER_IS_CYGWIN ) +if(CMAKE_COMPILER_IS_CYGWIN) + set(CYGWIN 1) + set(UNIX 1) +endif() + +set(CMAKE_CXX_COMPILER_ENV_VAR "CXX") + +if(CMAKE_COMPILER_IS_MINGW) + set(MINGW 1) +endif() +set(CMAKE_CXX_COMPILER_ID_RUN 1) +set(CMAKE_CXX_IGNORE_EXTENSIONS inl;h;hpp;HPP;H;o;O;obj;OBJ;def;DEF;rc;RC) +set(CMAKE_CXX_SOURCE_FILE_EXTENSIONS C;M;c++;cc;cpp;cxx;mm;CPP) +set(CMAKE_CXX_LINKER_PREFERENCE 30) +set(CMAKE_CXX_LINKER_PREFERENCE_PROPAGATES 1) + +# Save compiler ABI information. +set(CMAKE_CXX_SIZEOF_DATA_PTR "8") +set(CMAKE_CXX_COMPILER_ABI "ELF") +set(CMAKE_CXX_LIBRARY_ARCHITECTURE "") + +if(CMAKE_CXX_SIZEOF_DATA_PTR) + set(CMAKE_SIZEOF_VOID_P "${CMAKE_CXX_SIZEOF_DATA_PTR}") +endif() + +if(CMAKE_CXX_COMPILER_ABI) + set(CMAKE_INTERNAL_PLATFORM_ABI "${CMAKE_CXX_COMPILER_ABI}") +endif() + +if(CMAKE_CXX_LIBRARY_ARCHITECTURE) + set(CMAKE_LIBRARY_ARCHITECTURE "") +endif() + +set(CMAKE_CXX_CL_SHOWINCLUDES_PREFIX "") +if(CMAKE_CXX_CL_SHOWINCLUDES_PREFIX) + set(CMAKE_CL_SHOWINCLUDES_PREFIX "${CMAKE_CXX_CL_SHOWINCLUDES_PREFIX}") +endif() + + + + +set(CMAKE_CXX_IMPLICIT_LINK_LIBRARIES "stdc++;m;c") +set(CMAKE_CXX_IMPLICIT_LINK_DIRECTORIES "/usr/lib64/gcc/x86_64-suse-linux/4.8;/usr/lib64;/lib64;/usr/x86_64-suse-linux/lib") +set(CMAKE_CXX_IMPLICIT_LINK_FRAMEWORK_DIRECTORIES "") diff --git a/thirdparty/bmt/build/CMakeFiles/3.5.2/CMakeDetermineCompilerABI_CXX.bin b/thirdparty/bmt/build/CMakeFiles/3.5.2/CMakeDetermineCompilerABI_CXX.bin new file mode 100755 index 0000000000000000000000000000000000000000..246feb2541717c984b88541f0c946b911aefb404 GIT binary patch literal 12552 zcmeHNeQ;FQb-(YuU1^2HY9&M>z%^?d46(B-@nI}O1iKP~6%ii>$tJ<#vD$souD#lo z_X7*pv1zb9Rvi+Wc4{Wgho>Ktw&Tn=(@aYgi-k;<)1^{X@kj1sm6KOc^>g zR)6Qddsc7Xt~z);{UbBJJMW&4d(OG%e!TncxpyDy8`z?2n&701je@8^eMGYCT!`2T zNh)5KXb=&xLM#(?;CS&wWC=kVV|Z1WX6RFL2A~H|mu@q<^oT?l-e9u@L-vp$)i;|W z3bo-4^K6o#5JAZxJ@WPJd`SWxkO;#A%8!(StVbm2T~K-#lpe!?l4r>6qqfmAsLBWR z2q>Z>1jJ^U&q9)=wCj8^N%&=D=&~F0^iKxR494mhC3{B)fITkgSQun-lrmhKG1me(Sj< zS7VR*etYT{|NPc3|8_98Y5v7r3(8vYP~twV9Curv)@B{i~1k z_Pu(kY5g;et4DtI_H&cy zTSb#NQ5R7h8+=3Zo5l0WzMB5x+mas;-&6du65fMCF`2micAER^glUeA=dxy@7|$0? zQ<&Qhn!ByBbfIYFdo%Gu!72!|e{j2*wDO8QxV<-%%UXl+k&I=R)ht0h<8~Fm#u5p$ zK-4M$D#29~uBsl7r?YTIauS7-IhxLBb}Xc6>Cp}=KU z7Sa|;i>(9wJ-z1Y@apg#+y{0aX?Wji{cI^J`2(Odk13TFr}3KT;$52*SDOz=%>CZB zt9eB4nsW__9l7L-J?*`tAt+)`oGf}Ttbsu6$d7%erI~UzbH~jPnOVLJdGpc;Frrgb zesOvRY58w`K{0xa6;{37KlRi7v8i*hBX9g>*I@sdlQa`zXHLioVXJX4#h$=?_x);`eRe)AJT_UpCEhW*yZ6Dsc2@pdFf%|ogj+kPhB-2ADjA7 z?9`i^V%pi*Pp_7s{CyIPJ@GRr$EIGg_0K8&!`+AAuPC()LG!|+aAf!;?d#9;= zu_N8SItUbREW6M)^x`zWT9BVPJACo?q0+omTr2>Rjj1nOicOWK&-A@dwLaZ3vYny*~QtNx4Ip-am_lHr}<0LPl_eQ5?qWAPqU5O6$PrVu)95}js(i=n*kA?{D z8Ti!sRlkazx@ORbUFiE@W+wK;+r=AMe_-m(fvGE-VPmH4-(p8jYq7iDD!oYqxo4&Y{sF|E7=J$1W4GYPPkEHiu zvonnE!V+UcC#1c2E?t|Mc?9qVI5q&-fNuFqzyZJ?1HJ&*gbxXQuPA8`?-tr*KwHvO z?>nZ|2Z&F5lz)Pq+ekpA+VTQBCR;uQ`5?*q16%y<+nO66@*NhN7H_z1&9a+`O*S{< z8HQhS&5HU1PwKt%>kgt~Rh9%ijB>dK_V@!|(fj=EUpD&u!K0o&|H?0UWB$%3>tg<{ zBlX+;k&M49>hFyDSN8aWJ^psc_xOFXUC+VJU&9Wsbs#@ZANRn=J@7l&183Df=XnLW z30T5pa>KNXUvSa%&8F03gYte!g+t~2&>~gN@zfGU(^^Gou?@s)Dt@B%jS`o8Y*HN8 zBNF?s4`y1<@0P(TvL$u_3esYs33##UIL$EO1$>5Jg?uh z@1XQa8;ED?A`)Z%Q;OzxuzrJ*XBx|^Y)?wbi_S6<5+h^SLX0z zYj5xR;L1Hid-{R{6!?8AxHjB1yA>3lh9SEz?H+OO>)rLxJkUi6Irm_t#Ajsrd*yQ8 zuk(2Fe%w>3R;Fx!_Mw|?`y4mq%Mz7{UEklK3AX%CR6p?iJArKwC8j0e$Q}f)+AoKe z+rM7DkKu4f;ZPxIO%ys1GMfmYWvCF|BRW#KajPRfQc7o%9r=lI8|fM9AK2V6M*H{> zHr=Z`u$iZzIT+m&1icOsfOhy{RO*qRa-a4PaP1M)JpklB@<#JfAgJs0K?0t7a$Wby z39M^%G|_e4y9U7Uc+=S;%DfTiL!_U8R!8r$ryH5pK8d`+;GoRA6{JYw(Zz1$KJ5rG zkgA)R$QRQet(AlD#v^OQ8}hb4BQ#9t!aSjOs_}aegim_{ImJRX$1nTQ#!qVzfw~s0 zRWp1`e9L_cXdY@U+H(JbM#I0vAHWi;FB6T88$n_+Dip7wXRR>&>l-&T_Al79NXLrm zy;bxtklL^~R9D*`YN~hJ{6yIhtj<>84Qrbg*;TAqM0%0-XL=TCsG`nW*3jW!{j4YG zEsW=?}k%@TC2Zdf;*sn4(2n1@ZaKXe9aXx0}%%7By-| zsH|!I3&?)UO=xTDLT!a~3H?UOZL_XyTJ#xweiu5;5Cfg-?w-Bi>Vje)P5U4JeB|=z z7rts$BQ6B5OejVpbzaJhjhYb%_uR5^`!d5HEFGNOrz5)8ZjrWsua<2=Cko*NnGpx@ z=%NGp^|+u`4dnqZ{>CP4pw!)=dtn2P@i)Nu7oq7M@V+MN_yP8tiA zvW4_m)=CCxNiP$LR6H*Vsa(ETDPXfm)OAMmA-_t!_I4l`8ZVZzR`-~dwesl%$ap@H z>b4JwgQ2ltXy@u+2>sbTmMz_NS1>dR*mW2B8mGw?$avvUHj&Ebvbjl*MV+7^*L(K)VeUGlkH2 zE@@@DX^_+LOvswF5<>PL^#z?H6c0^U5;2Ry${w5$@%-39$`VyaW{%lnnq<+8=kxJH z>JW{LLB+wBr)sVvI{LK#Zgt|W{$6zL99>B-3Z>OHz%(9Wl(r3&G+{EzUty1UPaFD{ zU0j{B0Zj}tI-;aBv7b?=t-vCF{i5Bu@6x`>ovKYpOL zEoH^)Kn#p^mmaB-H1I(ftjRY-u_oVBjuV~64D8A|UaaKTmE*)petkKftmOMdtvJlU z%?`(JmGbih$AOjn#&Wz^$*;b5pmGB@SktPXE9J2ts*gBX5;q+8CeA_wr|dj$D*3bb zDV6-z^8I2Ze}P!p5OK03ZXnd)INw=lh=t|&zLNh5K_}4eWFV@p6@MC-&3qr=)G-8R zcc7M?>ia3uGiuxi*?fvRtCJyU_fnki(y7LZ^E)*Sw-wkN6WVepvCJckwq%`?L3Ma-RYoEq7Xt z`9Ps6*+1*D^Q4roo>zYbey#TYK+3lWx~QPU=K-$($3|Pwbp<87IWkQnuVrVhetQe@ znphy(=S6_crvC<7t3OGimVK}6km~oV9el0Ed=5)~_5CS@bsA35R=Vc@R%ySj>b={m z`0n@g0cofDy_=6ii(2=yMaa|rsXO#+fqbod+s7dvq|R`qKc0jCsI*_bUVU5gT}D8@ zG>4sY;Mck@es>P}M)+UrUb|hji}&`Ha$%*EpPlFI*&f)hbq~H)*>~SZZ&3E#_t7^? zevR(}BA`OCG&+j#$uzh1?luSd_YC4@dG-R?ERLHAxVzpnjvc>!%F`AE$TV}E}K7I%)4ytksOQOBo;}2N9 zd-wLDqIAZ}M|=9iiHQkeetuxpx^#9lXF6`=h}$KZxRG;4Sf;sUcXWH7*|%de-JHwD zGLCJr*%wm+vCX@MxpiP?PjtZCxn;|qzCm*^+B48cO1Mog8%P#%W-6Xd(!F}6({%Uk z*oezOSD2eWw4)3;TmA(QUjBN69C`Tu?8UwJ7{{|>164}cGGT`zCy z8Sc>ozoCqzIl65B9DbV>Q z>+^n^|9=*A3Iq4J=vAPd`n>NhDuc^h@^1YY%Ds5F{^NQCl)ye+0`Vs{kpG<`)IMFG z^@H{iS8xZy`n-?tG;BpW$);3igR;IN#SS2J=fnEPjEF2cru3gz9Y$>=lmm|+wuV0K9o_n029NrL?Q5Seob98yXD>VzeAV$pX=v%bNV{< ze+^xlYOK$3DgW=6|KF>oLwP>&I2?D?&++S3Y7~;&U#1zp0~x3PSRo=ERCQ^dD%m73c=dnOvl}V-EfQRDtQQT?#HzM3ug~ z|9*}OB}z~8!eEo-zG6EAN?#S1fvFZ=>K|uvk>cJt^be~)a-4N419$g7b+fCBrN;Df vThVe==pto%TuqdUyVS;KmwNGT1-Bay&qtnDv`p2i|Jhrl84+FoWtfX literal 0 HcmV?d00001 diff --git a/thirdparty/bmt/build/CMakeFiles/3.5.2/CMakeSystem.cmake b/thirdparty/bmt/build/CMakeFiles/3.5.2/CMakeSystem.cmake new file mode 100644 index 0000000..e822e95 --- /dev/null +++ b/thirdparty/bmt/build/CMakeFiles/3.5.2/CMakeSystem.cmake @@ -0,0 +1,15 @@ +set(CMAKE_HOST_SYSTEM "Linux-4.4.73-5.1_4.0.141-cray_ari_s") +set(CMAKE_HOST_SYSTEM_NAME "Linux") +set(CMAKE_HOST_SYSTEM_VERSION "4.4.73-5.1_4.0.141-cray_ari_s") +set(CMAKE_HOST_SYSTEM_PROCESSOR "x86_64") + + + +set(CMAKE_SYSTEM "Linux-4.4.73-5.1_4.0.141-cray_ari_s") +set(CMAKE_SYSTEM_NAME "Linux") +set(CMAKE_SYSTEM_VERSION "4.4.73-5.1_4.0.141-cray_ari_s") +set(CMAKE_SYSTEM_PROCESSOR "x86_64") + +set(CMAKE_CROSSCOMPILING "FALSE") + +set(CMAKE_SYSTEM_LOADED 1) diff --git a/thirdparty/bmt/build/CMakeFiles/3.5.2/CompilerIdCXX/CMakeCXXCompilerId.cpp b/thirdparty/bmt/build/CMakeFiles/3.5.2/CompilerIdCXX/CMakeCXXCompilerId.cpp new file mode 100644 index 0000000..e6d8536 --- /dev/null +++ b/thirdparty/bmt/build/CMakeFiles/3.5.2/CompilerIdCXX/CMakeCXXCompilerId.cpp @@ -0,0 +1,533 @@ +/* This source file must have a .cpp extension so that all C++ compilers + recognize the extension without flags. Borland does not know .cxx for + example. */ +#ifndef __cplusplus +# error "A C compiler has been selected for C++." +#endif + + +/* Version number components: V=Version, R=Revision, P=Patch + Version date components: YYYY=Year, MM=Month, DD=Day */ + +#if defined(__COMO__) +# define COMPILER_ID "Comeau" + /* __COMO_VERSION__ = VRR */ +# define COMPILER_VERSION_MAJOR DEC(__COMO_VERSION__ / 100) +# define COMPILER_VERSION_MINOR DEC(__COMO_VERSION__ % 100) + +#elif defined(__INTEL_COMPILER) || defined(__ICC) +# define COMPILER_ID "Intel" +# if defined(_MSC_VER) +# define SIMULATE_ID "MSVC" +# endif + /* __INTEL_COMPILER = VRP */ +# define COMPILER_VERSION_MAJOR DEC(__INTEL_COMPILER/100) +# define COMPILER_VERSION_MINOR DEC(__INTEL_COMPILER/10 % 10) +# if defined(__INTEL_COMPILER_UPDATE) +# define COMPILER_VERSION_PATCH DEC(__INTEL_COMPILER_UPDATE) +# else +# define COMPILER_VERSION_PATCH DEC(__INTEL_COMPILER % 10) +# endif +# if defined(__INTEL_COMPILER_BUILD_DATE) + /* __INTEL_COMPILER_BUILD_DATE = YYYYMMDD */ +# define COMPILER_VERSION_TWEAK DEC(__INTEL_COMPILER_BUILD_DATE) +# endif +# if defined(_MSC_VER) + /* _MSC_VER = VVRR */ +# define SIMULATE_VERSION_MAJOR DEC(_MSC_VER / 100) +# define SIMULATE_VERSION_MINOR DEC(_MSC_VER % 100) +# endif + +#elif defined(__PATHCC__) +# define COMPILER_ID "PathScale" +# define COMPILER_VERSION_MAJOR DEC(__PATHCC__) +# define COMPILER_VERSION_MINOR DEC(__PATHCC_MINOR__) +# if defined(__PATHCC_PATCHLEVEL__) +# define COMPILER_VERSION_PATCH DEC(__PATHCC_PATCHLEVEL__) +# endif + +#elif defined(__BORLANDC__) && defined(__CODEGEARC_VERSION__) +# define COMPILER_ID "Embarcadero" +# define COMPILER_VERSION_MAJOR HEX(__CODEGEARC_VERSION__>>24 & 0x00FF) +# define COMPILER_VERSION_MINOR HEX(__CODEGEARC_VERSION__>>16 & 0x00FF) +# define COMPILER_VERSION_PATCH DEC(__CODEGEARC_VERSION__ & 0xFFFF) + +#elif defined(__BORLANDC__) +# define COMPILER_ID "Borland" + /* __BORLANDC__ = 0xVRR */ +# define COMPILER_VERSION_MAJOR HEX(__BORLANDC__>>8) +# define COMPILER_VERSION_MINOR HEX(__BORLANDC__ & 0xFF) + +#elif defined(__WATCOMC__) && __WATCOMC__ < 1200 +# define COMPILER_ID "Watcom" + /* __WATCOMC__ = VVRR */ +# define COMPILER_VERSION_MAJOR DEC(__WATCOMC__ / 100) +# define COMPILER_VERSION_MINOR DEC((__WATCOMC__ / 10) % 10) +# if (__WATCOMC__ % 10) > 0 +# define COMPILER_VERSION_PATCH DEC(__WATCOMC__ % 10) +# endif + +#elif defined(__WATCOMC__) +# define COMPILER_ID "OpenWatcom" + /* __WATCOMC__ = VVRP + 1100 */ +# define COMPILER_VERSION_MAJOR DEC((__WATCOMC__ - 1100) / 100) +# define COMPILER_VERSION_MINOR DEC((__WATCOMC__ / 10) % 10) +# if (__WATCOMC__ % 10) > 0 +# define COMPILER_VERSION_PATCH DEC(__WATCOMC__ % 10) +# endif + +#elif defined(__SUNPRO_CC) +# define COMPILER_ID "SunPro" +# if __SUNPRO_CC >= 0x5100 + /* __SUNPRO_CC = 0xVRRP */ +# define COMPILER_VERSION_MAJOR HEX(__SUNPRO_CC>>12) +# define COMPILER_VERSION_MINOR HEX(__SUNPRO_CC>>4 & 0xFF) +# define COMPILER_VERSION_PATCH HEX(__SUNPRO_CC & 0xF) +# else + /* __SUNPRO_CC = 0xVRP */ +# define COMPILER_VERSION_MAJOR HEX(__SUNPRO_CC>>8) +# define COMPILER_VERSION_MINOR HEX(__SUNPRO_CC>>4 & 0xF) +# define COMPILER_VERSION_PATCH HEX(__SUNPRO_CC & 0xF) +# endif + +#elif defined(__HP_aCC) +# define COMPILER_ID "HP" + /* __HP_aCC = VVRRPP */ +# define COMPILER_VERSION_MAJOR DEC(__HP_aCC/10000) +# define COMPILER_VERSION_MINOR DEC(__HP_aCC/100 % 100) +# define COMPILER_VERSION_PATCH DEC(__HP_aCC % 100) + +#elif defined(__DECCXX) +# define COMPILER_ID "Compaq" + /* __DECCXX_VER = VVRRTPPPP */ +# define COMPILER_VERSION_MAJOR DEC(__DECCXX_VER/10000000) +# define COMPILER_VERSION_MINOR DEC(__DECCXX_VER/100000 % 100) +# define COMPILER_VERSION_PATCH DEC(__DECCXX_VER % 10000) + +#elif defined(__IBMCPP__) && defined(__COMPILER_VER__) +# define COMPILER_ID "zOS" + /* __IBMCPP__ = VRP */ +# define COMPILER_VERSION_MAJOR DEC(__IBMCPP__/100) +# define COMPILER_VERSION_MINOR DEC(__IBMCPP__/10 % 10) +# define COMPILER_VERSION_PATCH DEC(__IBMCPP__ % 10) + +#elif defined(__IBMCPP__) && !defined(__COMPILER_VER__) && __IBMCPP__ >= 800 +# define COMPILER_ID "XL" + /* __IBMCPP__ = VRP */ +# define COMPILER_VERSION_MAJOR DEC(__IBMCPP__/100) +# define COMPILER_VERSION_MINOR DEC(__IBMCPP__/10 % 10) +# define COMPILER_VERSION_PATCH DEC(__IBMCPP__ % 10) + +#elif defined(__IBMCPP__) && !defined(__COMPILER_VER__) && __IBMCPP__ < 800 +# define COMPILER_ID "VisualAge" + /* __IBMCPP__ = VRP */ +# define COMPILER_VERSION_MAJOR DEC(__IBMCPP__/100) +# define COMPILER_VERSION_MINOR DEC(__IBMCPP__/10 % 10) +# define COMPILER_VERSION_PATCH DEC(__IBMCPP__ % 10) + +#elif defined(__PGI) +# define COMPILER_ID "PGI" +# define COMPILER_VERSION_MAJOR DEC(__PGIC__) +# define COMPILER_VERSION_MINOR DEC(__PGIC_MINOR__) +# if defined(__PGIC_PATCHLEVEL__) +# define COMPILER_VERSION_PATCH DEC(__PGIC_PATCHLEVEL__) +# endif + +#elif defined(_CRAYC) +# define COMPILER_ID "Cray" +# define COMPILER_VERSION_MAJOR DEC(_RELEASE_MAJOR) +# define COMPILER_VERSION_MINOR DEC(_RELEASE_MINOR) + +#elif defined(__TI_COMPILER_VERSION__) +# define COMPILER_ID "TI" + /* __TI_COMPILER_VERSION__ = VVVRRRPPP */ +# define COMPILER_VERSION_MAJOR DEC(__TI_COMPILER_VERSION__/1000000) +# define COMPILER_VERSION_MINOR DEC(__TI_COMPILER_VERSION__/1000 % 1000) +# define COMPILER_VERSION_PATCH DEC(__TI_COMPILER_VERSION__ % 1000) + +#elif defined(__FUJITSU) || defined(__FCC_VERSION) || defined(__fcc_version) +# define COMPILER_ID "Fujitsu" + +#elif defined(__SCO_VERSION__) +# define COMPILER_ID "SCO" + +#elif defined(__clang__) && defined(__apple_build_version__) +# define COMPILER_ID "AppleClang" +# if defined(_MSC_VER) +# define SIMULATE_ID "MSVC" +# endif +# define COMPILER_VERSION_MAJOR DEC(__clang_major__) +# define COMPILER_VERSION_MINOR DEC(__clang_minor__) +# define COMPILER_VERSION_PATCH DEC(__clang_patchlevel__) +# if defined(_MSC_VER) + /* _MSC_VER = VVRR */ +# define SIMULATE_VERSION_MAJOR DEC(_MSC_VER / 100) +# define SIMULATE_VERSION_MINOR DEC(_MSC_VER % 100) +# endif +# define COMPILER_VERSION_TWEAK DEC(__apple_build_version__) + +#elif defined(__clang__) +# define COMPILER_ID "Clang" +# if defined(_MSC_VER) +# define SIMULATE_ID "MSVC" +# endif +# define COMPILER_VERSION_MAJOR DEC(__clang_major__) +# define COMPILER_VERSION_MINOR DEC(__clang_minor__) +# define COMPILER_VERSION_PATCH DEC(__clang_patchlevel__) +# if defined(_MSC_VER) + /* _MSC_VER = VVRR */ +# define SIMULATE_VERSION_MAJOR DEC(_MSC_VER / 100) +# define SIMULATE_VERSION_MINOR DEC(_MSC_VER % 100) +# endif + +#elif defined(__GNUC__) +# define COMPILER_ID "GNU" +# define COMPILER_VERSION_MAJOR DEC(__GNUC__) +# if defined(__GNUC_MINOR__) +# define COMPILER_VERSION_MINOR DEC(__GNUC_MINOR__) +# endif +# if defined(__GNUC_PATCHLEVEL__) +# define COMPILER_VERSION_PATCH DEC(__GNUC_PATCHLEVEL__) +# endif + +#elif defined(_MSC_VER) +# define COMPILER_ID "MSVC" + /* _MSC_VER = VVRR */ +# define COMPILER_VERSION_MAJOR DEC(_MSC_VER / 100) +# define COMPILER_VERSION_MINOR DEC(_MSC_VER % 100) +# if defined(_MSC_FULL_VER) +# if _MSC_VER >= 1400 + /* _MSC_FULL_VER = VVRRPPPPP */ +# define COMPILER_VERSION_PATCH DEC(_MSC_FULL_VER % 100000) +# else + /* _MSC_FULL_VER = VVRRPPPP */ +# define COMPILER_VERSION_PATCH DEC(_MSC_FULL_VER % 10000) +# endif +# endif +# if defined(_MSC_BUILD) +# define COMPILER_VERSION_TWEAK DEC(_MSC_BUILD) +# endif + +#elif defined(__VISUALDSPVERSION__) || defined(__ADSPBLACKFIN__) || defined(__ADSPTS__) || defined(__ADSP21000__) +# define COMPILER_ID "ADSP" +#if defined(__VISUALDSPVERSION__) + /* __VISUALDSPVERSION__ = 0xVVRRPP00 */ +# define COMPILER_VERSION_MAJOR HEX(__VISUALDSPVERSION__>>24) +# define COMPILER_VERSION_MINOR HEX(__VISUALDSPVERSION__>>16 & 0xFF) +# define COMPILER_VERSION_PATCH HEX(__VISUALDSPVERSION__>>8 & 0xFF) +#endif + +#elif defined(__IAR_SYSTEMS_ICC__ ) || defined(__IAR_SYSTEMS_ICC) +# define COMPILER_ID "IAR" + +#elif defined(__ARMCC_VERSION) +# define COMPILER_ID "ARMCC" +#if __ARMCC_VERSION >= 1000000 + /* __ARMCC_VERSION = VRRPPPP */ + # define COMPILER_VERSION_MAJOR DEC(__ARMCC_VERSION/1000000) + # define COMPILER_VERSION_MINOR DEC(__ARMCC_VERSION/10000 % 100) + # define COMPILER_VERSION_PATCH DEC(__ARMCC_VERSION % 10000) +#else + /* __ARMCC_VERSION = VRPPPP */ + # define COMPILER_VERSION_MAJOR DEC(__ARMCC_VERSION/100000) + # define COMPILER_VERSION_MINOR DEC(__ARMCC_VERSION/10000 % 10) + # define COMPILER_VERSION_PATCH DEC(__ARMCC_VERSION % 10000) +#endif + + +#elif defined(_SGI_COMPILER_VERSION) || defined(_COMPILER_VERSION) +# define COMPILER_ID "MIPSpro" +# if defined(_SGI_COMPILER_VERSION) + /* _SGI_COMPILER_VERSION = VRP */ +# define COMPILER_VERSION_MAJOR DEC(_SGI_COMPILER_VERSION/100) +# define COMPILER_VERSION_MINOR DEC(_SGI_COMPILER_VERSION/10 % 10) +# define COMPILER_VERSION_PATCH DEC(_SGI_COMPILER_VERSION % 10) +# else + /* _COMPILER_VERSION = VRP */ +# define COMPILER_VERSION_MAJOR DEC(_COMPILER_VERSION/100) +# define COMPILER_VERSION_MINOR DEC(_COMPILER_VERSION/10 % 10) +# define COMPILER_VERSION_PATCH DEC(_COMPILER_VERSION % 10) +# endif + + +/* These compilers are either not known or too old to define an + identification macro. Try to identify the platform and guess that + it is the native compiler. */ +#elif defined(__sgi) +# define COMPILER_ID "MIPSpro" + +#elif defined(__hpux) || defined(__hpua) +# define COMPILER_ID "HP" + +#else /* unknown compiler */ +# define COMPILER_ID "" +#endif + +/* Construct the string literal in pieces to prevent the source from + getting matched. Store it in a pointer rather than an array + because some compilers will just produce instructions to fill the + array rather than assigning a pointer to a static array. */ +char const* info_compiler = "INFO" ":" "compiler[" COMPILER_ID "]"; +#ifdef SIMULATE_ID +char const* info_simulate = "INFO" ":" "simulate[" SIMULATE_ID "]"; +#endif + +#ifdef __QNXNTO__ +char const* qnxnto = "INFO" ":" "qnxnto[]"; +#endif + +#if defined(__CRAYXE) || defined(__CRAYXC) +char const *info_cray = "INFO" ":" "compiler_wrapper[CrayPrgEnv]"; +#endif + +#define STRINGIFY_HELPER(X) #X +#define STRINGIFY(X) STRINGIFY_HELPER(X) + +/* Identify known platforms by name. */ +#if defined(__linux) || defined(__linux__) || defined(linux) +# define PLATFORM_ID "Linux" + +#elif defined(__CYGWIN__) +# define PLATFORM_ID "Cygwin" + +#elif defined(__MINGW32__) +# define PLATFORM_ID "MinGW" + +#elif defined(__APPLE__) +# define PLATFORM_ID "Darwin" + +#elif defined(_WIN32) || defined(__WIN32__) || defined(WIN32) +# define PLATFORM_ID "Windows" + +#elif defined(__FreeBSD__) || defined(__FreeBSD) +# define PLATFORM_ID "FreeBSD" + +#elif defined(__NetBSD__) || defined(__NetBSD) +# define PLATFORM_ID "NetBSD" + +#elif defined(__OpenBSD__) || defined(__OPENBSD) +# define PLATFORM_ID "OpenBSD" + +#elif defined(__sun) || defined(sun) +# define PLATFORM_ID "SunOS" + +#elif defined(_AIX) || defined(__AIX) || defined(__AIX__) || defined(__aix) || defined(__aix__) +# define PLATFORM_ID "AIX" + +#elif defined(__sgi) || defined(__sgi__) || defined(_SGI) +# define PLATFORM_ID "IRIX" + +#elif defined(__hpux) || defined(__hpux__) +# define PLATFORM_ID "HP-UX" + +#elif defined(__HAIKU__) +# define PLATFORM_ID "Haiku" + +#elif defined(__BeOS) || defined(__BEOS__) || defined(_BEOS) +# define PLATFORM_ID "BeOS" + +#elif defined(__QNX__) || defined(__QNXNTO__) +# define PLATFORM_ID "QNX" + +#elif defined(__tru64) || defined(_tru64) || defined(__TRU64__) +# define PLATFORM_ID "Tru64" + +#elif defined(__riscos) || defined(__riscos__) +# define PLATFORM_ID "RISCos" + +#elif defined(__sinix) || defined(__sinix__) || defined(__SINIX__) +# define PLATFORM_ID "SINIX" + +#elif defined(__UNIX_SV__) +# define PLATFORM_ID "UNIX_SV" + +#elif defined(__bsdos__) +# define PLATFORM_ID "BSDOS" + +#elif defined(_MPRAS) || defined(MPRAS) +# define PLATFORM_ID "MP-RAS" + +#elif defined(__osf) || defined(__osf__) +# define PLATFORM_ID "OSF1" + +#elif defined(_SCO_SV) || defined(SCO_SV) || defined(sco_sv) +# define PLATFORM_ID "SCO_SV" + +#elif defined(__ultrix) || defined(__ultrix__) || defined(_ULTRIX) +# define PLATFORM_ID "ULTRIX" + +#elif defined(__XENIX__) || defined(_XENIX) || defined(XENIX) +# define PLATFORM_ID "Xenix" + +#elif defined(__WATCOMC__) +# if defined(__LINUX__) +# define PLATFORM_ID "Linux" + +# elif defined(__DOS__) +# define PLATFORM_ID "DOS" + +# elif defined(__OS2__) +# define PLATFORM_ID "OS2" + +# elif defined(__WINDOWS__) +# define PLATFORM_ID "Windows3x" + +# else /* unknown platform */ +# define PLATFORM_ID "" +# endif + +#else /* unknown platform */ +# define PLATFORM_ID "" + +#endif + +/* For windows compilers MSVC and Intel we can determine + the architecture of the compiler being used. This is because + the compilers do not have flags that can change the architecture, + but rather depend on which compiler is being used +*/ +#if defined(_WIN32) && defined(_MSC_VER) +# if defined(_M_IA64) +# define ARCHITECTURE_ID "IA64" + +# elif defined(_M_X64) || defined(_M_AMD64) +# define ARCHITECTURE_ID "x64" + +# elif defined(_M_IX86) +# define ARCHITECTURE_ID "X86" + +# elif defined(_M_ARM) +# if _M_ARM == 4 +# define ARCHITECTURE_ID "ARMV4I" +# elif _M_ARM == 5 +# define ARCHITECTURE_ID "ARMV5I" +# else +# define ARCHITECTURE_ID "ARMV" STRINGIFY(_M_ARM) +# endif + +# elif defined(_M_MIPS) +# define ARCHITECTURE_ID "MIPS" + +# elif defined(_M_SH) +# define ARCHITECTURE_ID "SHx" + +# else /* unknown architecture */ +# define ARCHITECTURE_ID "" +# endif + +#elif defined(__WATCOMC__) +# if defined(_M_I86) +# define ARCHITECTURE_ID "I86" + +# elif defined(_M_IX86) +# define ARCHITECTURE_ID "X86" + +# else /* unknown architecture */ +# define ARCHITECTURE_ID "" +# endif + +#else +# define ARCHITECTURE_ID "" +#endif + +/* Convert integer to decimal digit literals. */ +#define DEC(n) \ + ('0' + (((n) / 10000000)%10)), \ + ('0' + (((n) / 1000000)%10)), \ + ('0' + (((n) / 100000)%10)), \ + ('0' + (((n) / 10000)%10)), \ + ('0' + (((n) / 1000)%10)), \ + ('0' + (((n) / 100)%10)), \ + ('0' + (((n) / 10)%10)), \ + ('0' + ((n) % 10)) + +/* Convert integer to hex digit literals. */ +#define HEX(n) \ + ('0' + ((n)>>28 & 0xF)), \ + ('0' + ((n)>>24 & 0xF)), \ + ('0' + ((n)>>20 & 0xF)), \ + ('0' + ((n)>>16 & 0xF)), \ + ('0' + ((n)>>12 & 0xF)), \ + ('0' + ((n)>>8 & 0xF)), \ + ('0' + ((n)>>4 & 0xF)), \ + ('0' + ((n) & 0xF)) + +/* Construct a string literal encoding the version number components. */ +#ifdef COMPILER_VERSION_MAJOR +char const info_version[] = { + 'I', 'N', 'F', 'O', ':', + 'c','o','m','p','i','l','e','r','_','v','e','r','s','i','o','n','[', + COMPILER_VERSION_MAJOR, +# ifdef COMPILER_VERSION_MINOR + '.', COMPILER_VERSION_MINOR, +# ifdef COMPILER_VERSION_PATCH + '.', COMPILER_VERSION_PATCH, +# ifdef COMPILER_VERSION_TWEAK + '.', COMPILER_VERSION_TWEAK, +# endif +# endif +# endif + ']','\0'}; +#endif + +/* Construct a string literal encoding the version number components. */ +#ifdef SIMULATE_VERSION_MAJOR +char const info_simulate_version[] = { + 'I', 'N', 'F', 'O', ':', + 's','i','m','u','l','a','t','e','_','v','e','r','s','i','o','n','[', + SIMULATE_VERSION_MAJOR, +# ifdef SIMULATE_VERSION_MINOR + '.', SIMULATE_VERSION_MINOR, +# ifdef SIMULATE_VERSION_PATCH + '.', SIMULATE_VERSION_PATCH, +# ifdef SIMULATE_VERSION_TWEAK + '.', SIMULATE_VERSION_TWEAK, +# endif +# endif +# endif + ']','\0'}; +#endif + +/* Construct the string literal in pieces to prevent the source from + getting matched. Store it in a pointer rather than an array + because some compilers will just produce instructions to fill the + array rather than assigning a pointer to a static array. */ +char const* info_platform = "INFO" ":" "platform[" PLATFORM_ID "]"; +char const* info_arch = "INFO" ":" "arch[" ARCHITECTURE_ID "]"; + + + + +const char* info_language_dialect_default = "INFO" ":" "dialect_default[" +#if __cplusplus >= 201402L + "14" +#elif __cplusplus >= 201103L + "11" +#else + "98" +#endif +"]"; + +/*--------------------------------------------------------------------------*/ + +int main(int argc, char* argv[]) +{ + int require = 0; + require += info_compiler[argc]; + require += info_platform[argc]; +#ifdef COMPILER_VERSION_MAJOR + require += info_version[argc]; +#endif +#ifdef SIMULATE_ID + require += info_simulate[argc]; +#endif +#ifdef SIMULATE_VERSION_MAJOR + require += info_simulate_version[argc]; +#endif +#if defined(__CRAYXE) || defined(__CRAYXC) + require += info_cray[argc]; +#endif + require += info_language_dialect_default[argc]; + (void)argv; + return require; +} diff --git a/thirdparty/bmt/build/CMakeFiles/3.5.2/CompilerIdCXX/a.out b/thirdparty/bmt/build/CMakeFiles/3.5.2/CompilerIdCXX/a.out new file mode 100755 index 0000000000000000000000000000000000000000..2ae39adc6aed23d4eb9598ae92fa7c71a5ff3dc9 GIT binary patch literal 12704 zcmeHNeQ+Dcb>F)KNl28yPdSw6ScoD;N^F3XL@SgWS^z&JK&2!~ByGhqbpnASfEphR zaF8iCj+HR>gjJm?owm`W_ECv|GN%2>`HZ4{@ONv8cnb>mca?TI^b zJxb!3`Mte;AaDRX@_713XS5jX``Gv1?!MdG-M4#>jEwHnHBE5R#odCa%bt*AIk^y5 zwn|d*`o$^{5?jS)(Fjfh{vlaH(7_m9SEdhC=vdIq6YbFRzs(;7N%vJfqt26$M$3NYcBi^sXvBhOJ7TA@`5^M*j&_ zKB0#|5gj5RhGo7CNtV*sYj%QgyGfFOA&D^TbJ&6*>m7j}wev<(s}ek_>RZV#y{i2T z`8o7t($l;8dNPSXCY>wI1?Kv91$OlXi}~OV*>AE>vilEB%9`kTbD}nP;!iv(|60pG z9eZTcPmaXj`ql&Ai#&BA{rBJZQ5p4zs;_?Dj$N>&KKGyM;y;z$Xrgxd>j-aLfqxeK z_B!%ASCBuv0$;#?MLQ9{{dV%%E@7I7s2FhBC2Y(JBfv zI&siUSOvwNI5?Ea=d6j?bjEVZ>Xx9MtWyQBWIS#biP}!}$GK|4Rn^&8Iw$szMhAz? z9l;&JJGrZB7{JGx_#&7*eo7nCtLP~`sRXaLoRk`?4l!kTXScuHO>5ZIi z$Ts^$nFG2Zx zBp7-06(~m*UUKwbRr+TJe!ki-M5%2OnpeMohD^QG@Dfwl%@ugtPvu8q>X3CPi^(Eh%x;S$#d~bMRDSS_K;ob0Lbm6DriP6Wme5J_` zmpmRII6nHx*S7y_Jm3QweC87pq%g`0_$GWPM^{ZqBpg=38Eo2RTo zF`dsH?{!jNP$B({+(|~XZsV?AK$_R?_}$Xd=K=2r90iQRVNU}-5BLKr$75cD7m4Oe zO4>t5g*Ml!b+2jip3|CIiBD^!7y6$f0hQ{@3#{0V{4?(@E%`~-*SgQwb)cpBLGKx{ zcf(zu+PQfXvB~Bz{!ha0=ZF#ZwLYc~t!>mZsM#q?fc-YgEh-=Mwf?C-;_LdVG2-(- z?iulI`;&%7Xx^@%4v&yQ*$zLmL$RGZjD4`a+4zJq{_(>miB#&-+VxLSEKG1j;eMoZ-;4zoc|k4-pWnWiG_u zD%h-GmxAjQWKiCNVmn8P*B+MV^^^9ils@7Bky8C<{+Ob2OP(`D|&W?c3e@<`F*2n zFNYE04#+ybc09)C43&EqX;5V}PIOU1&K|BT72-3p{93u3$0t8;9-lnEsrH+Fo+fI#?v(>v*Ba?X(DjC$0EVX_ zowHHa5Q08Lq6D-?de=Pz$h59GreUb(8 zge0kis%!jgnLH1&=`V@A=Ut%Q5VD`*zbY-Xd;5U=0{_(yD|a<(llmi~S)0(mfc%L5 zFbdbiRpA({h_&9=W&SJV%JDuj)ACKK!@Qtmc{0pz3#d0nL$V!*j>_Z$OP7ZjUn+HZYpk;|iDv{kEl;zIDu zgkp43=c&xNTQgdNgCD>9;AX?;FP)k@uH)25`?$3I(R#K;I+wPCaWW%L;;)My=#rHe0K7o;-|;LNnUE?MBWv+Wk&G5HNBFjFtoU zW5(5gNZF-7^L|~|(Kh_U-j zizv)mg`^dTWio+yK8Hb?vmI`+7z00J<&t)4pcjILd?|;XBvHMc0_`?rXNrMrK4E1B z=po}kC}7Q5aUuOjz9^<2vh35dmPAZjSh-WPB34MAqAZRxU3svI71<>H%F$vUS?21Rmd|NrI0vFH$7yMik*)MeVCpe806O_W(Cqr3`x#5Ha5 zDOFwr*nlP`7@aQLI>BhAt%)13b%br5EZdr5w9?j*8e2P+LcfBTV4q_Ap0crVg}DIY z1euu4QyjnOJh$;n&TG<{8MxcS?f8b;zj8airM4nv z#T!5j%wqj|s7BJjbU0C$ZwR|C-&2kwD~%b5B44g6-&l?#tL2*n$D7rBuc#NN8MqbY z_^w)hwHQ6N8o|%Gn!i?DsB6b37VGj`%J;a{c3R7Edo{mJ@YcPW-(J3l ztmb!!XPQElEQuQkHTWE$vd|Fg1jql?{96RwaJZ9!sJmW#YKShu_e_;KhQR17pTwP)a+P%~!L)=!&1ji5?SK#|s;Pcy9U2KR=)eJ=$g7#F^=@pP3 z@!w0;Tx_OR;I9E+6Se-Qc|sR0*h`0iEhmcgh~Mkt2NmCa?lcU(CYJpy_bK3wOMdNm zpiq_U^EnnsnI1!~)frcR1Ae{!J}>3lL`aQKp1uEh1^Jcy^K)sZL!6hf8wiyshOFl| zDz4X_Yf`WFJ!(Y7TAguwt8`Rn&HEBieE0hml6Gp}msi2}i`wx|p?f?0d_m3A95Vh= z@p&`P>jK5Je%XiWjKbu4{BiKxaZAkW6Km1QLpu`{kiyP-cD@UK8-9zbogSt0(hBld zz;CV7-?zw)zM zit*1~KBVMBF8O1M-|OOUll|%z$6W21k@jo*l?9*LN%!59Z>M#m@cCBpIt2d14hGPscK5!p;|pW~?+P=o&O**;XRB zYv;~S*B3Hr7hB%n+J)0%rVz_oW}=kMo`y;VRlOA^(V>H}2dtrE$A;8>ZZr{$&&~?- zzR~SF(z%&D-@jJxwD%ni9~?194h_?H6|!rLWZd0B zGMKQYOG)VAIpFnDBzckX*z|P4I>i)4!HmOajArtv$1$ZW$#?(5WVGI6>cN`qB;KF% zcYC)gJ_C}iOteF3!k_hdAJ5;#`74Ei^Zd^2?LaE^dB1NfgPU1ULAQPc{098F{_}bW zlt6qSfjs{wtkEIMw+w7&82oo}H^cfIKfJ6M+<&*eGA9lJp}Q#7zp95o(Y=)f@-US! z>LdA%*B_48C=N65XMK)G=7@>J_gxO<*kGLC3H%YWmD|s8%O=%9+W%4F_H#W9A3z!1 zT`|q^O|R1TlQNRK{SH?M=jvg({Tv64DShrI*YEECmtlxR2bDp-e@oTTr#-7%|Eu7U zU)lZ{PY9Gid?f)^?CPZ0W#$5~O8pB;|AG?qmn9vfRnZLp9CW4rqS9Yf`rKYNGQtqveJKrj8`PCmsx)eq_TeAk3Fp%!uOOc@9sa1FVg4dzt|X(qKiuZJCa(?{L84s zzKy(6pTA?}?_2v^^6vUyaOrcr`3wy>q<0)p&fWg|AxeDstJkF*m-6?nZh1FNw>5}@ z%K99?{!CqfyXD>VzeAV$&-HVhd*uf8e+gZ3HS2Rc%-#Ci;`qpVV&<8Z&y@2$`!l=YbXzT$EJIbI)A`fhou zt1=m25YJr?>+}4>I;_u-zP)3dWeCoy`rU=hX4&qQ^wq&!H+M`axa-=wg8tb~M}cnc zDwB&8cdnrCyT#F>c&{?KND)^0ZvVZ5i!4fKd17$LvOCz$sM1%(W$=iac*#GN$wi8L zSJ1zpJ}|jlsSMohe{!?S#Zu#;PdJK}t3nqk`{U}ORGd`{|9SP|k-0ti^LXTOMe}RD P`n{W_8BuT ignore + arg [--build-id] ==> ignore + arg [--eh-frame-hdr] ==> ignore + arg [-m] ==> ignore + arg [elf_x86_64] ==> ignore + arg [-export-dynamic] ==> ignore + arg [-dynamic-linker] ==> ignore + arg [/lib64/ld-linux-x86-64.so.2] ==> ignore + arg [-o] ==> ignore + arg [cmTC_d6736] ==> ignore + arg [/usr/lib64/gcc/x86_64-suse-linux/4.8/../../../../lib64/crt1.o] ==> ignore + arg [/usr/lib64/gcc/x86_64-suse-linux/4.8/../../../../lib64/crti.o] ==> ignore + arg [/usr/lib64/gcc/x86_64-suse-linux/4.8/crtbegin.o] ==> ignore + arg [-L/usr/lib64/gcc/x86_64-suse-linux/4.8] ==> dir [/usr/lib64/gcc/x86_64-suse-linux/4.8] + arg [-L/usr/lib64/gcc/x86_64-suse-linux/4.8/../../../../lib64] ==> dir [/usr/lib64/gcc/x86_64-suse-linux/4.8/../../../../lib64] + arg [-L/lib/../lib64] ==> dir [/lib/../lib64] + arg [-L/usr/lib/../lib64] ==> dir [/usr/lib/../lib64] + arg [-L/usr/lib64/gcc/x86_64-suse-linux/4.8/../../../../x86_64-suse-linux/lib] ==> dir [/usr/lib64/gcc/x86_64-suse-linux/4.8/../../../../x86_64-suse-linux/lib] + arg [-L/usr/lib64/gcc/x86_64-suse-linux/4.8/../../..] ==> dir [/usr/lib64/gcc/x86_64-suse-linux/4.8/../../..] + arg [CMakeFiles/cmTC_d6736.dir/CMakeCXXCompilerABI.cpp.o] ==> ignore + arg [-lstdc++] ==> lib [stdc++] + arg [-lm] ==> lib [m] + arg [-lgcc_s] ==> lib [gcc_s] + arg [-lgcc] ==> lib [gcc] + arg [-lc] ==> lib [c] + arg [-lgcc_s] ==> lib [gcc_s] + arg [-lgcc] ==> lib [gcc] + arg [/usr/lib64/gcc/x86_64-suse-linux/4.8/crtend.o] ==> ignore + arg [/usr/lib64/gcc/x86_64-suse-linux/4.8/../../../../lib64/crtn.o] ==> ignore + remove lib [gcc_s] + remove lib [gcc] + remove lib [gcc_s] + remove lib [gcc] + collapse library dir [/usr/lib64/gcc/x86_64-suse-linux/4.8] ==> [/usr/lib64/gcc/x86_64-suse-linux/4.8] + collapse library dir [/usr/lib64/gcc/x86_64-suse-linux/4.8/../../../../lib64] ==> [/usr/lib64] + collapse library dir [/lib/../lib64] ==> [/lib64] + collapse library dir [/usr/lib/../lib64] ==> [/usr/lib64] + collapse library dir [/usr/lib64/gcc/x86_64-suse-linux/4.8/../../../../x86_64-suse-linux/lib] ==> [/usr/x86_64-suse-linux/lib] + collapse library dir [/usr/lib64/gcc/x86_64-suse-linux/4.8/../../..] ==> [/usr/lib64] + implicit libs: [stdc++;m;c] + implicit dirs: [/usr/lib64/gcc/x86_64-suse-linux/4.8;/usr/lib64;/lib64;/usr/x86_64-suse-linux/lib] + implicit fwks: [] + + + + +Detecting CXX [-std=c++1y] compiler features compiled with the following output: +Change Dir: /home/bemdeppi/ham/thirdparty/bmt/build/CMakeFiles/CMakeTmp + +Run Build Command:"/usr/bin/gmake" "cmTC_a580f/fast" +/usr/bin/gmake -f CMakeFiles/cmTC_a580f.dir/build.make CMakeFiles/cmTC_a580f.dir/build +gmake[1]: Entering directory '/home/bemdeppi/ham/thirdparty/bmt/build/CMakeFiles/CMakeTmp' +Building CXX object CMakeFiles/cmTC_a580f.dir/feature_tests.cxx.o +/usr/bin/c++ -std=c++1y -o CMakeFiles/cmTC_a580f.dir/feature_tests.cxx.o -c /home/bemdeppi/ham/thirdparty/bmt/build/CMakeFiles/feature_tests.cxx +Linking CXX executable cmTC_a580f +/usr/bin/cmake -E cmake_link_script CMakeFiles/cmTC_a580f.dir/link.txt --verbose=1 +/usr/bin/c++ CMakeFiles/cmTC_a580f.dir/feature_tests.cxx.o -o cmTC_a580f -rdynamic +gmake[1]: Leaving directory '/home/bemdeppi/ham/thirdparty/bmt/build/CMakeFiles/CMakeTmp' + + + Feature record: CXX_FEATURE:0cxx_aggregate_default_initializers + Feature record: CXX_FEATURE:1cxx_alias_templates + Feature record: CXX_FEATURE:1cxx_alignas + Feature record: CXX_FEATURE:1cxx_alignof + Feature record: CXX_FEATURE:1cxx_attributes + Feature record: CXX_FEATURE:0cxx_attribute_deprecated + Feature record: CXX_FEATURE:1cxx_auto_type + Feature record: CXX_FEATURE:0cxx_binary_literals + Feature record: CXX_FEATURE:1cxx_constexpr + Feature record: CXX_FEATURE:0cxx_contextual_conversions + Feature record: CXX_FEATURE:1cxx_decltype + Feature record: CXX_FEATURE:0cxx_decltype_auto + Feature record: CXX_FEATURE:1cxx_decltype_incomplete_return_types + Feature record: CXX_FEATURE:1cxx_default_function_template_args + Feature record: CXX_FEATURE:1cxx_defaulted_functions + Feature record: CXX_FEATURE:1cxx_defaulted_move_initializers + Feature record: CXX_FEATURE:1cxx_delegating_constructors + Feature record: CXX_FEATURE:1cxx_deleted_functions + Feature record: CXX_FEATURE:0cxx_digit_separators + Feature record: CXX_FEATURE:1cxx_enum_forward_declarations + Feature record: CXX_FEATURE:1cxx_explicit_conversions + Feature record: CXX_FEATURE:1cxx_extended_friend_declarations + Feature record: CXX_FEATURE:1cxx_extern_templates + Feature record: CXX_FEATURE:1cxx_final + Feature record: CXX_FEATURE:1cxx_func_identifier + Feature record: CXX_FEATURE:1cxx_generalized_initializers + Feature record: CXX_FEATURE:0cxx_generic_lambdas + Feature record: CXX_FEATURE:1cxx_inheriting_constructors + Feature record: CXX_FEATURE:1cxx_inline_namespaces + Feature record: CXX_FEATURE:1cxx_lambdas + Feature record: CXX_FEATURE:0cxx_lambda_init_captures + Feature record: CXX_FEATURE:1cxx_local_type_template_args + Feature record: CXX_FEATURE:1cxx_long_long_type + Feature record: CXX_FEATURE:1cxx_noexcept + Feature record: CXX_FEATURE:1cxx_nonstatic_member_init + Feature record: CXX_FEATURE:1cxx_nullptr + Feature record: CXX_FEATURE:1cxx_override + Feature record: CXX_FEATURE:1cxx_range_for + Feature record: CXX_FEATURE:1cxx_raw_string_literals + Feature record: CXX_FEATURE:1cxx_reference_qualified_functions + Feature record: CXX_FEATURE:0cxx_relaxed_constexpr + Feature record: CXX_FEATURE:0cxx_return_type_deduction + Feature record: CXX_FEATURE:1cxx_right_angle_brackets + Feature record: CXX_FEATURE:1cxx_rvalue_references + Feature record: CXX_FEATURE:1cxx_sizeof_member + Feature record: CXX_FEATURE:1cxx_static_assert + Feature record: CXX_FEATURE:1cxx_strong_enums + Feature record: CXX_FEATURE:1cxx_template_template_parameters + Feature record: CXX_FEATURE:1cxx_thread_local + Feature record: CXX_FEATURE:1cxx_trailing_return_types + Feature record: CXX_FEATURE:1cxx_unicode_literals + Feature record: CXX_FEATURE:1cxx_uniform_initialization + Feature record: CXX_FEATURE:1cxx_unrestricted_unions + Feature record: CXX_FEATURE:1cxx_user_literals + Feature record: CXX_FEATURE:0cxx_variable_templates + Feature record: CXX_FEATURE:1cxx_variadic_macros + Feature record: CXX_FEATURE:1cxx_variadic_templates + + +Detecting CXX [-std=c++11] compiler features compiled with the following output: +Change Dir: /home/bemdeppi/ham/thirdparty/bmt/build/CMakeFiles/CMakeTmp + +Run Build Command:"/usr/bin/gmake" "cmTC_83717/fast" +/usr/bin/gmake -f CMakeFiles/cmTC_83717.dir/build.make CMakeFiles/cmTC_83717.dir/build +gmake[1]: Entering directory '/home/bemdeppi/ham/thirdparty/bmt/build/CMakeFiles/CMakeTmp' +Building CXX object CMakeFiles/cmTC_83717.dir/feature_tests.cxx.o +/usr/bin/c++ -std=c++11 -o CMakeFiles/cmTC_83717.dir/feature_tests.cxx.o -c /home/bemdeppi/ham/thirdparty/bmt/build/CMakeFiles/feature_tests.cxx +Linking CXX executable cmTC_83717 +/usr/bin/cmake -E cmake_link_script CMakeFiles/cmTC_83717.dir/link.txt --verbose=1 +/usr/bin/c++ CMakeFiles/cmTC_83717.dir/feature_tests.cxx.o -o cmTC_83717 -rdynamic +gmake[1]: Leaving directory '/home/bemdeppi/ham/thirdparty/bmt/build/CMakeFiles/CMakeTmp' + + + Feature record: CXX_FEATURE:0cxx_aggregate_default_initializers + Feature record: CXX_FEATURE:1cxx_alias_templates + Feature record: CXX_FEATURE:1cxx_alignas + Feature record: CXX_FEATURE:1cxx_alignof + Feature record: CXX_FEATURE:1cxx_attributes + Feature record: CXX_FEATURE:0cxx_attribute_deprecated + Feature record: CXX_FEATURE:1cxx_auto_type + Feature record: CXX_FEATURE:0cxx_binary_literals + Feature record: CXX_FEATURE:1cxx_constexpr + Feature record: CXX_FEATURE:0cxx_contextual_conversions + Feature record: CXX_FEATURE:1cxx_decltype + Feature record: CXX_FEATURE:0cxx_decltype_auto + Feature record: CXX_FEATURE:1cxx_decltype_incomplete_return_types + Feature record: CXX_FEATURE:1cxx_default_function_template_args + Feature record: CXX_FEATURE:1cxx_defaulted_functions + Feature record: CXX_FEATURE:1cxx_defaulted_move_initializers + Feature record: CXX_FEATURE:1cxx_delegating_constructors + Feature record: CXX_FEATURE:1cxx_deleted_functions + Feature record: CXX_FEATURE:0cxx_digit_separators + Feature record: CXX_FEATURE:1cxx_enum_forward_declarations + Feature record: CXX_FEATURE:1cxx_explicit_conversions + Feature record: CXX_FEATURE:1cxx_extended_friend_declarations + Feature record: CXX_FEATURE:1cxx_extern_templates + Feature record: CXX_FEATURE:1cxx_final + Feature record: CXX_FEATURE:1cxx_func_identifier + Feature record: CXX_FEATURE:1cxx_generalized_initializers + Feature record: CXX_FEATURE:0cxx_generic_lambdas + Feature record: CXX_FEATURE:1cxx_inheriting_constructors + Feature record: CXX_FEATURE:1cxx_inline_namespaces + Feature record: CXX_FEATURE:1cxx_lambdas + Feature record: CXX_FEATURE:0cxx_lambda_init_captures + Feature record: CXX_FEATURE:1cxx_local_type_template_args + Feature record: CXX_FEATURE:1cxx_long_long_type + Feature record: CXX_FEATURE:1cxx_noexcept + Feature record: CXX_FEATURE:1cxx_nonstatic_member_init + Feature record: CXX_FEATURE:1cxx_nullptr + Feature record: CXX_FEATURE:1cxx_override + Feature record: CXX_FEATURE:1cxx_range_for + Feature record: CXX_FEATURE:1cxx_raw_string_literals + Feature record: CXX_FEATURE:1cxx_reference_qualified_functions + Feature record: CXX_FEATURE:0cxx_relaxed_constexpr + Feature record: CXX_FEATURE:0cxx_return_type_deduction + Feature record: CXX_FEATURE:1cxx_right_angle_brackets + Feature record: CXX_FEATURE:1cxx_rvalue_references + Feature record: CXX_FEATURE:1cxx_sizeof_member + Feature record: CXX_FEATURE:1cxx_static_assert + Feature record: CXX_FEATURE:1cxx_strong_enums + Feature record: CXX_FEATURE:1cxx_template_template_parameters + Feature record: CXX_FEATURE:1cxx_thread_local + Feature record: CXX_FEATURE:1cxx_trailing_return_types + Feature record: CXX_FEATURE:1cxx_unicode_literals + Feature record: CXX_FEATURE:1cxx_uniform_initialization + Feature record: CXX_FEATURE:1cxx_unrestricted_unions + Feature record: CXX_FEATURE:1cxx_user_literals + Feature record: CXX_FEATURE:0cxx_variable_templates + Feature record: CXX_FEATURE:1cxx_variadic_macros + Feature record: CXX_FEATURE:1cxx_variadic_templates + + +Detecting CXX [-std=c++98] compiler features compiled with the following output: +Change Dir: /home/bemdeppi/ham/thirdparty/bmt/build/CMakeFiles/CMakeTmp + +Run Build Command:"/usr/bin/gmake" "cmTC_1dbbe/fast" +/usr/bin/gmake -f CMakeFiles/cmTC_1dbbe.dir/build.make CMakeFiles/cmTC_1dbbe.dir/build +gmake[1]: Entering directory '/home/bemdeppi/ham/thirdparty/bmt/build/CMakeFiles/CMakeTmp' +Building CXX object CMakeFiles/cmTC_1dbbe.dir/feature_tests.cxx.o +/usr/bin/c++ -std=c++98 -o CMakeFiles/cmTC_1dbbe.dir/feature_tests.cxx.o -c /home/bemdeppi/ham/thirdparty/bmt/build/CMakeFiles/feature_tests.cxx +Linking CXX executable cmTC_1dbbe +/usr/bin/cmake -E cmake_link_script CMakeFiles/cmTC_1dbbe.dir/link.txt --verbose=1 +/usr/bin/c++ CMakeFiles/cmTC_1dbbe.dir/feature_tests.cxx.o -o cmTC_1dbbe -rdynamic +gmake[1]: Leaving directory '/home/bemdeppi/ham/thirdparty/bmt/build/CMakeFiles/CMakeTmp' + + + Feature record: CXX_FEATURE:0cxx_aggregate_default_initializers + Feature record: CXX_FEATURE:0cxx_alias_templates + Feature record: CXX_FEATURE:0cxx_alignas + Feature record: CXX_FEATURE:0cxx_alignof + Feature record: CXX_FEATURE:0cxx_attributes + Feature record: CXX_FEATURE:0cxx_attribute_deprecated + Feature record: CXX_FEATURE:0cxx_auto_type + Feature record: CXX_FEATURE:0cxx_binary_literals + Feature record: CXX_FEATURE:0cxx_constexpr + Feature record: CXX_FEATURE:0cxx_contextual_conversions + Feature record: CXX_FEATURE:0cxx_decltype + Feature record: CXX_FEATURE:0cxx_decltype_auto + Feature record: CXX_FEATURE:0cxx_decltype_incomplete_return_types + Feature record: CXX_FEATURE:0cxx_default_function_template_args + Feature record: CXX_FEATURE:0cxx_defaulted_functions + Feature record: CXX_FEATURE:0cxx_defaulted_move_initializers + Feature record: CXX_FEATURE:0cxx_delegating_constructors + Feature record: CXX_FEATURE:0cxx_deleted_functions + Feature record: CXX_FEATURE:0cxx_digit_separators + Feature record: CXX_FEATURE:0cxx_enum_forward_declarations + Feature record: CXX_FEATURE:0cxx_explicit_conversions + Feature record: CXX_FEATURE:0cxx_extended_friend_declarations + Feature record: CXX_FEATURE:0cxx_extern_templates + Feature record: CXX_FEATURE:0cxx_final + Feature record: CXX_FEATURE:0cxx_func_identifier + Feature record: CXX_FEATURE:0cxx_generalized_initializers + Feature record: CXX_FEATURE:0cxx_generic_lambdas + Feature record: CXX_FEATURE:0cxx_inheriting_constructors + Feature record: CXX_FEATURE:0cxx_inline_namespaces + Feature record: CXX_FEATURE:0cxx_lambdas + Feature record: CXX_FEATURE:0cxx_lambda_init_captures + Feature record: CXX_FEATURE:0cxx_local_type_template_args + Feature record: CXX_FEATURE:0cxx_long_long_type + Feature record: CXX_FEATURE:0cxx_noexcept + Feature record: CXX_FEATURE:0cxx_nonstatic_member_init + Feature record: CXX_FEATURE:0cxx_nullptr + Feature record: CXX_FEATURE:0cxx_override + Feature record: CXX_FEATURE:0cxx_range_for + Feature record: CXX_FEATURE:0cxx_raw_string_literals + Feature record: CXX_FEATURE:0cxx_reference_qualified_functions + Feature record: CXX_FEATURE:0cxx_relaxed_constexpr + Feature record: CXX_FEATURE:0cxx_return_type_deduction + Feature record: CXX_FEATURE:0cxx_right_angle_brackets + Feature record: CXX_FEATURE:0cxx_rvalue_references + Feature record: CXX_FEATURE:0cxx_sizeof_member + Feature record: CXX_FEATURE:0cxx_static_assert + Feature record: CXX_FEATURE:0cxx_strong_enums + Feature record: CXX_FEATURE:1cxx_template_template_parameters + Feature record: CXX_FEATURE:0cxx_thread_local + Feature record: CXX_FEATURE:0cxx_trailing_return_types + Feature record: CXX_FEATURE:0cxx_unicode_literals + Feature record: CXX_FEATURE:0cxx_uniform_initialization + Feature record: CXX_FEATURE:0cxx_unrestricted_unions + Feature record: CXX_FEATURE:0cxx_user_literals + Feature record: CXX_FEATURE:0cxx_variable_templates + Feature record: CXX_FEATURE:0cxx_variadic_macros + Feature record: CXX_FEATURE:0cxx_variadic_templates diff --git a/thirdparty/bmt/build/CMakeFiles/Makefile.cmake b/thirdparty/bmt/build/CMakeFiles/Makefile.cmake new file mode 100644 index 0000000..dc1d5a8 --- /dev/null +++ b/thirdparty/bmt/build/CMakeFiles/Makefile.cmake @@ -0,0 +1,95 @@ +# CMAKE generated file: DO NOT EDIT! +# Generated by "Unix Makefiles" Generator, CMake + +# The generator used is: +set(CMAKE_DEPENDS_GENERATOR "Unix Makefiles") + +# The top level Makefile was generated from the following files: +set(CMAKE_MAKEFILE_DEPENDS + "CMakeCache.txt" + "../CMakeLists.txt" + "CMakeFiles/3.5.2/CMakeCXXCompiler.cmake" + "CMakeFiles/3.5.2/CMakeSystem.cmake" + "CMakeFiles/feature_tests.cxx" + "../src/CMakeLists.txt" + "/usr/share/cmake/Modules/CMakeCXXCompiler.cmake.in" + "/usr/share/cmake/Modules/CMakeCXXCompilerABI.cpp" + "/usr/share/cmake/Modules/CMakeCXXInformation.cmake" + "/usr/share/cmake/Modules/CMakeCommonLanguageInclude.cmake" + "/usr/share/cmake/Modules/CMakeCompilerIdDetection.cmake" + "/usr/share/cmake/Modules/CMakeDetermineCXXCompiler.cmake" + "/usr/share/cmake/Modules/CMakeDetermineCompileFeatures.cmake" + "/usr/share/cmake/Modules/CMakeDetermineCompiler.cmake" + "/usr/share/cmake/Modules/CMakeDetermineCompilerABI.cmake" + "/usr/share/cmake/Modules/CMakeDetermineCompilerId.cmake" + "/usr/share/cmake/Modules/CMakeDetermineSystem.cmake" + "/usr/share/cmake/Modules/CMakeFindBinUtils.cmake" + "/usr/share/cmake/Modules/CMakeGenericSystem.cmake" + "/usr/share/cmake/Modules/CMakeLanguageInformation.cmake" + "/usr/share/cmake/Modules/CMakeParseArguments.cmake" + "/usr/share/cmake/Modules/CMakeParseImplicitLinkInfo.cmake" + "/usr/share/cmake/Modules/CMakeSystem.cmake.in" + "/usr/share/cmake/Modules/CMakeSystemSpecificInformation.cmake" + "/usr/share/cmake/Modules/CMakeSystemSpecificInitialize.cmake" + "/usr/share/cmake/Modules/CMakeTestCXXCompiler.cmake" + "/usr/share/cmake/Modules/CMakeTestCompilerCommon.cmake" + "/usr/share/cmake/Modules/CMakeUnixFindMake.cmake" + "/usr/share/cmake/Modules/Compiler/ADSP-DetermineCompiler.cmake" + "/usr/share/cmake/Modules/Compiler/ARMCC-DetermineCompiler.cmake" + "/usr/share/cmake/Modules/Compiler/AppleClang-DetermineCompiler.cmake" + "/usr/share/cmake/Modules/Compiler/Borland-DetermineCompiler.cmake" + "/usr/share/cmake/Modules/Compiler/Clang-DetermineCompiler.cmake" + "/usr/share/cmake/Modules/Compiler/Clang-DetermineCompilerInternal.cmake" + "/usr/share/cmake/Modules/Compiler/Comeau-CXX-DetermineCompiler.cmake" + "/usr/share/cmake/Modules/Compiler/Compaq-CXX-DetermineCompiler.cmake" + "/usr/share/cmake/Modules/Compiler/Cray-DetermineCompiler.cmake" + "/usr/share/cmake/Modules/Compiler/Embarcadero-DetermineCompiler.cmake" + "/usr/share/cmake/Modules/Compiler/Fujitsu-DetermineCompiler.cmake" + "/usr/share/cmake/Modules/Compiler/GHS-DetermineCompiler.cmake" + "/usr/share/cmake/Modules/Compiler/GNU-CXX-FeatureTests.cmake" + "/usr/share/cmake/Modules/Compiler/GNU-CXX.cmake" + "/usr/share/cmake/Modules/Compiler/GNU-DetermineCompiler.cmake" + "/usr/share/cmake/Modules/Compiler/GNU.cmake" + "/usr/share/cmake/Modules/Compiler/HP-CXX-DetermineCompiler.cmake" + "/usr/share/cmake/Modules/Compiler/IAR-DetermineCompiler.cmake" + "/usr/share/cmake/Modules/Compiler/IBMCPP-CXX-DetermineVersionInternal.cmake" + "/usr/share/cmake/Modules/Compiler/Intel-DetermineCompiler.cmake" + "/usr/share/cmake/Modules/Compiler/MIPSpro-DetermineCompiler.cmake" + "/usr/share/cmake/Modules/Compiler/MSVC-DetermineCompiler.cmake" + "/usr/share/cmake/Modules/Compiler/OpenWatcom-DetermineCompiler.cmake" + "/usr/share/cmake/Modules/Compiler/PGI-DetermineCompiler.cmake" + "/usr/share/cmake/Modules/Compiler/PathScale-DetermineCompiler.cmake" + "/usr/share/cmake/Modules/Compiler/SCO-DetermineCompiler.cmake" + "/usr/share/cmake/Modules/Compiler/SunPro-CXX-DetermineCompiler.cmake" + "/usr/share/cmake/Modules/Compiler/TI-DetermineCompiler.cmake" + "/usr/share/cmake/Modules/Compiler/VisualAge-CXX-DetermineCompiler.cmake" + "/usr/share/cmake/Modules/Compiler/Watcom-DetermineCompiler.cmake" + "/usr/share/cmake/Modules/Compiler/XL-CXX-DetermineCompiler.cmake" + "/usr/share/cmake/Modules/Compiler/zOS-CXX-DetermineCompiler.cmake" + "/usr/share/cmake/Modules/Internal/FeatureTesting.cmake" + "/usr/share/cmake/Modules/Platform/Linux-CXX.cmake" + "/usr/share/cmake/Modules/Platform/Linux-GNU-CXX.cmake" + "/usr/share/cmake/Modules/Platform/Linux-GNU.cmake" + "/usr/share/cmake/Modules/Platform/Linux.cmake" + "/usr/share/cmake/Modules/Platform/UnixPaths.cmake" + ) + +# The corresponding makefile is: +set(CMAKE_MAKEFILE_OUTPUTS + "Makefile" + "CMakeFiles/cmake.check_cache" + ) + +# Byproducts of CMake generate step: +set(CMAKE_MAKEFILE_PRODUCTS + "CMakeFiles/3.5.2/CMakeSystem.cmake" + "CMakeFiles/3.5.2/CMakeCXXCompiler.cmake" + "CMakeFiles/3.5.2/CMakeCXXCompiler.cmake" + "CMakeFiles/CMakeDirectoryInformation.cmake" + "src/CMakeFiles/CMakeDirectoryInformation.cmake" + ) + +# Dependency information for all targets: +set(CMAKE_DEPEND_INFO_FILES + "src/CMakeFiles/example.dir/DependInfo.cmake" + ) diff --git a/thirdparty/bmt/build/CMakeFiles/Makefile2 b/thirdparty/bmt/build/CMakeFiles/Makefile2 new file mode 100644 index 0000000..5fb0193 --- /dev/null +++ b/thirdparty/bmt/build/CMakeFiles/Makefile2 @@ -0,0 +1,126 @@ +# CMAKE generated file: DO NOT EDIT! +# Generated by "Unix Makefiles" Generator, CMake + +# Default target executed when no arguments are given to make. +default_target: all + +.PHONY : default_target + +# The main recursive all target +all: + +.PHONY : all + +# The main recursive preinstall target +preinstall: + +.PHONY : preinstall + +#============================================================================= +# Special targets provided by cmake. + +# Disable implicit rules so canonical targets will work. +.SUFFIXES: + + +# Remove some rules from gmake that .SUFFIXES does not remove. +SUFFIXES = + +.SUFFIXES: .hpux_make_needs_suffix_list + + +# Suppress display of executed commands. +$(VERBOSE).SILENT: + + +# A target that is always out of date. +cmake_force: + +.PHONY : cmake_force + +#============================================================================= +# Set environment variables for the build. + +# The shell in which to execute make rules. +SHELL = /bin/sh + +# The CMake executable. +CMAKE_COMMAND = /usr/bin/cmake + +# The command to remove a file. +RM = /usr/bin/cmake -E remove -f + +# Escaping for special characters. +EQUALS = = + +# The top-level source directory on which CMake was run. +CMAKE_SOURCE_DIR = /home/bemdeppi/ham/thirdparty/bmt + +# The top-level build directory on which CMake was run. +CMAKE_BINARY_DIR = /home/bemdeppi/ham/thirdparty/bmt/build + +#============================================================================= +# Directory level rules for directory src + +# Convenience name for "all" pass in the directory. +src/all: src/CMakeFiles/example.dir/all + +.PHONY : src/all + +# Convenience name for "clean" pass in the directory. +src/clean: src/CMakeFiles/example.dir/clean + +.PHONY : src/clean + +# Convenience name for "preinstall" pass in the directory. +src/preinstall: + +.PHONY : src/preinstall + +#============================================================================= +# Target rules for target src/CMakeFiles/example.dir + +# All Build rule for target. +src/CMakeFiles/example.dir/all: + $(MAKE) -f src/CMakeFiles/example.dir/build.make src/CMakeFiles/example.dir/depend + $(MAKE) -f src/CMakeFiles/example.dir/build.make src/CMakeFiles/example.dir/build + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --progress-dir=/home/bemdeppi/ham/thirdparty/bmt/build/CMakeFiles --progress-num=1,2 "Built target example" +.PHONY : src/CMakeFiles/example.dir/all + +# Include target in all. +all: src/CMakeFiles/example.dir/all + +.PHONY : all + +# Build rule for subdir invocation for target. +src/CMakeFiles/example.dir/rule: cmake_check_build_system + $(CMAKE_COMMAND) -E cmake_progress_start /home/bemdeppi/ham/thirdparty/bmt/build/CMakeFiles 2 + $(MAKE) -f CMakeFiles/Makefile2 src/CMakeFiles/example.dir/all + $(CMAKE_COMMAND) -E cmake_progress_start /home/bemdeppi/ham/thirdparty/bmt/build/CMakeFiles 0 +.PHONY : src/CMakeFiles/example.dir/rule + +# Convenience name for target. +example: src/CMakeFiles/example.dir/rule + +.PHONY : example + +# clean rule for target. +src/CMakeFiles/example.dir/clean: + $(MAKE) -f src/CMakeFiles/example.dir/build.make src/CMakeFiles/example.dir/clean +.PHONY : src/CMakeFiles/example.dir/clean + +# clean rule for target. +clean: src/CMakeFiles/example.dir/clean + +.PHONY : clean + +#============================================================================= +# Special targets to cleanup operation of make. + +# Special rule to run CMake to check the build system integrity. +# No rule that depends on this can have commands that come from listfiles +# because they might be regenerated. +cmake_check_build_system: + $(CMAKE_COMMAND) -H$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) --check-build-system CMakeFiles/Makefile.cmake 0 +.PHONY : cmake_check_build_system + diff --git a/thirdparty/bmt/build/CMakeFiles/TargetDirectories.txt b/thirdparty/bmt/build/CMakeFiles/TargetDirectories.txt new file mode 100644 index 0000000..ba137ee --- /dev/null +++ b/thirdparty/bmt/build/CMakeFiles/TargetDirectories.txt @@ -0,0 +1,5 @@ +/home/bemdeppi/ham/thirdparty/bmt/build/CMakeFiles/edit_cache.dir +/home/bemdeppi/ham/thirdparty/bmt/build/CMakeFiles/rebuild_cache.dir +/home/bemdeppi/ham/thirdparty/bmt/build/src/CMakeFiles/edit_cache.dir +/home/bemdeppi/ham/thirdparty/bmt/build/src/CMakeFiles/rebuild_cache.dir +/home/bemdeppi/ham/thirdparty/bmt/build/src/CMakeFiles/example.dir diff --git a/thirdparty/bmt/build/CMakeFiles/cmake.check_cache b/thirdparty/bmt/build/CMakeFiles/cmake.check_cache new file mode 100644 index 0000000..3dccd73 --- /dev/null +++ b/thirdparty/bmt/build/CMakeFiles/cmake.check_cache @@ -0,0 +1 @@ +# This file is generated by cmake for dependency checking of the CMakeCache.txt file diff --git a/thirdparty/bmt/build/CMakeFiles/feature_tests.bin b/thirdparty/bmt/build/CMakeFiles/feature_tests.bin new file mode 100755 index 0000000000000000000000000000000000000000..a9390051b36d43414e9fd29c5560d4b5d350d9db GIT binary patch literal 16600 zcmeHOeQ+GbmG9Zn$J(-2AGV2XCwL)tOl-80jWNPtdnH+BMPzJj36p>sk9J493%j#o zcGi}F1Sbv@I*yz#MFm$95?5Va-A8d%T-`@}sZ=nSg78Nw&Ic5C$yNCv9^XQowd!S?vJaeRWt9S-+TS~bx+Uq_S5}?!`nleCb)#e7C}^7b6m2#yAYk% zNK)~7#XJ!gtHnyu1WqHaxU3=QVGPeH(+tB(P6uqj)f@5{y`i{77=F@Y3x@0=L27Td z#1-nnXWKoJp%78YAU*Q+)Tbl~xL+a+_bWd#3bG!Nq<2Q?ol$xW+mt**?jQAyt`Su~ z5{iQ&I!-`rllNIjvQ&1x=B5Zg+9pZBxI`Fk@YsSO>)i}J^7CSgGB0+k_Ugr@NBPf? z$Duoy9ox8}JD2LpW$n^r*JSU;u8kXF#X@Yo>^Ipb*&A*el`heEb0MD><02l_ui1O= zqN#V*zO;V4Y5kIC?)_Cy^6&4VVeG&~g=1Ph?w&r$wc?84T8N8mH{+sMu&+R6nXe%E zM%|i+t3~nrV1b{ZX!4nClRmX`r0q~xT()>#xS?U;Lk%KMb!3;?xO3jXOW%L>>-P-| zzWBQZn;vUfKlaNvpZMIXC;sKfuYTv{yKnvEo%et9zJGe;oyda4uYBjXKYxq5P`8l1 zXh7|XI{b&h=lE5sT~$YZXC3|zz;6@FL^Qe|*lc+XXq#9rdVTyiB!8Lc@$r8Mel)<& zGvIe%1bMw#1DeX;;A+R;v2dJ7Ij#g>z(w*=#kYuw^3S-)AAh??!6*3-o8!Plc|G|# z@D8y+9B+yfyIdXzM)F6LeKq~XUr2tNcvSJr3V05RZYsHUEzSLn!Z6bLf^8IC({T+$ z7&jg;c3bIe(Y2g`oLMYdMPUq$>@ZT6qu3)m266@48ZpOmmRA>8gLd*>6Ts5Rq){Yl zodA{OrU_R~=gq7QXCxoH=uOtrt`t4mKL(iX}nUHw?X_|0}=bxu}bmn`|dvCmT z#Wo?1Ujqk66J0d#5{J)464S384DCC4oa~Wf@9levs%B2Nt~f-z<3zFi)8}-^C#IiC zJpIPjgmxj$;R{-KS)fM-hO!SgT&F+e>`*o z-E95giG4732wsT3y?xU&eYXrvpY0nRntriwWcZO)N0&s=g-5yw?iv2`fl&r*LO#s zaq`G`jOE`wiLTSws-e{6N-IqT-sTV7daJR0uy15^_u!_U~a!?8uNZV$;io$q+N!N9%+A)VL)xX8=8bcw`}5E0?m8x!~5h z#xUVweMQ7PSwKWB41;62CC8S%32e$6r16rSbm2{TBn;C@*O#|a25X#GC|@{WU0?#F ztQ^gktevKy6kVs3bPGWK` z3rRdI^ekLxV&Wy9Hsme%$*~L8WYU^&YeQ?(tivQo8hI-}W;xQQ0I^aoH{k|E9#hN}_B&S2oP>ycjDlC7ip2mk zHHAkFb3eelY&zo_@IGf5V~&};%W?yzs&l~1m1u!cUIvI1F&_)#DzLTfsJPRInG-NF z2oQ}bJ+sxe*U_7X-x;$AJ@$F55kaemn{g~Ng&BegRqKfBm|4u_^aWPSlATQ!Qr3Bf z3=$ac{OltrSC`rbOE%^^#y?3b8k7Rx1NcmI&ePTb)5+3jYdHYbL6WayDU+hPZ6=+< z`2;we#rb&CYs>ZPXph^7D{*#a<`Cc{;4VPxzh-9s67V6w9|4{NY}0_C(7uw2cF%61 zO}1&v7c_^DXw7ZJr{5Byu(5^&R0)TR?cpUX|uhJiB`;*XMr1NX~U?loT!(il! zKW_M0yW5?})^6k>0*YPhaGU{z$Yx(h2$gNLcpk3D`LeJG>_WIalT)Fc*Qj z2+T!bE&_8An2W$%1m+?z7lA+E2%J*q98W69p-|f+{0i?ChFKq7^wD2dG@pm@c?q4h zQQ`BDr5;&6S6!}X+5=Hp<^l1tIzN?rCGfbuLDe4(#U=LsM>7RNdUTJbeIS*1V_ZTy zC!lgf)zhAl%7-2(pX+rhdWnJzbVl%VJQdzku{`e!>D+y{D&0H{a}4M z1Ej(W4ACD)`zlLhHGUfSm8^Y|_ z=W|1+EK!Yk?fo6P;K~13#ewJFQJjxZVOkOux4VI>{>!1|{%=yxV=UHPoGRj{|6(_e z%ncl&%cH{B9?_jCY!j4&Byw<1mj7&7*yLqM$e8 z1W-G44;pQdKM{nrgTS@>(DpEp+sPX(Ob5ZCP;-<(Lo>M^3d;!`(wb8I&Sv&>*WZ% zX^^djwI2Z2CUK))`p5L&!Bd^kcT*RBu~l+pmm9oJHfG4e0IOOMprU>IRot7BvU0m* zfxJVKR71@*eZEW{1=;)!B5yehREFh6AoH?9+q(hCuW`-Kt8lewqoMmni#8Ja67DyL zzKF^NdsO8vSP=`uugm*exU0ZN$V}@K)P~_dl-9o8wgMG&wL^CmV84}BA|5~m22A2BC_9p z8T#6>L|ZLgLflAs@vJLbmtK$8?~+P0#6WQU;kgyukm#X0?Y|=nahIuI@KviBaUpnR zLNz)W;-yUAqUmk1{!ee&u~LsjO9v)z3*mHM`?R$EsUX{ePGDn6G9&im3W;vqFUAG6 zYA6qQ@z=L%!==sLp+?w%WBd-#eQMpSV!kRJ3FlvuWE(MQKSd z6UmI}h+?MTxYYt4i$r}nA|LauG~j&>M7#2C$+kADlRl89lgw=PlEk82>1fx^_0cZG zb934*U3+b`YaFomTEv>p^FW%#DLa|L;Y*=Z>?+v@vv#V>rK5%-D)W|;wz|w*u1g*? znYQb3i$xRsoMoro%;p{lI)##rY?m~ecQc^Fd=X(eSmJsnv|J}*gD*t(f*?JwKf$7rRkn`>-cr4)J(?zybp^i`Z5LAD9FyTV~Sk{et+1HpEbp$4Eyo8`fMpHUIe0Jtb0T88c7{5 zgpokLF5EzVgE$(~vB~?yYJPKhKUvKWi=cg&j?7Npzg5f67rYOw=C_ph zi`D$vyhEzjk->Ue#kpD@-$TvuN|wa+ioD6nN*!5tJa4M`v-y_gxN9*wUX{@f+#pTruMd^aRsjAD5KsJbXW~#ZgiU#u+ zfUk+#_|rNO!u*N*<`wPrh(F=u#}xlbAAg&)Kbv6dc zJ-@H`{PEB45!yaSxsw&}U>*LLv{Sn-JuLYZ1{C_&b?iI`eo$Wci*@AR0zW8!{R8lW z^2it9$D5!$@@u4>+I;s;DPKE|E5HxRJ8zQwfb}0couXSBAICPyFm4>!Z43|X89@g5 zY@)c~=8Yu2O(})kNEM89t}uq&v=rqTA{}#5aK>dSw(+W~t_c>BiQwh$PuH0eQ7ZEYMGQKhjgE!TZ|zEN*G@l?j85I#me21VQk;sw`0&4ylER{!OIRY zj!#@;Frfqz+ja}%hT)z4eZ$7i?c4VZju<0-{lkN#gv|J|fmE?zWK26nIr7zxQ6786 zN44Cv3S-;nZ|d7IG=RG5@9T@n&-YZMA!IFO?RdO0>HYla`_&2dl<6Os&Hph{?`77# zOy?J|;#3~HAVAmgV1_YlS1e~j#O#7=#rpb(uvgFFv66l z>P zF~>T<6zwY+N6Z*SHXftOQteXozv9q)6iWu)bzb4`UjBbK{}%|!Ru+2m(<_bj`TLmv zpDbD_47|ZduLE7F&)?CmGPu$w@7GVDz7ZF<&)?fe72{E*-=v`TYx%4n+rUSvT7mU@ z^thDl(LDwFWtxg#-{T5#6A=2}EY|1m_ajRGNfj{aBca@WCvYmXzadf9=X0YtbqeJ} z50vlz_}_sV?1jqy^L}89a+u@C{&PDF?}9#^`7q7@9+OtVTi`GUl}~s6qnes0Dbxv^y_~OJc-PdQNTmSRP`sFwKxoa4P*T?^~muyaFD9JL@q# z2VSK<|NmuFDYBnzfc2RE2ng}G|GdxWiFy)#d1|Y&=wJ}9r;1KUR5Kzws&fNYSVC z{qg(7YES=Rez27jtj};*=~wzlxS=*+6ralCBgNi2`iItfTBjQsarM wJw?mcppTSu$Ja%vxa&$!zgInYp8&0~+p{aR2}S literal 0 HcmV?d00001 diff --git a/thirdparty/bmt/build/CMakeFiles/feature_tests.cxx b/thirdparty/bmt/build/CMakeFiles/feature_tests.cxx new file mode 100644 index 0000000..b93418c --- /dev/null +++ b/thirdparty/bmt/build/CMakeFiles/feature_tests.cxx @@ -0,0 +1,405 @@ + + const char features[] = {"\n" +"CXX_FEATURE:" +#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 500 && __cplusplus >= 201402L +"1" +#else +"0" +#endif +"cxx_aggregate_default_initializers\n" +"CXX_FEATURE:" +#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 407 && __cplusplus >= 201103L +"1" +#else +"0" +#endif +"cxx_alias_templates\n" +"CXX_FEATURE:" +#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 408 && __cplusplus >= 201103L +"1" +#else +"0" +#endif +"cxx_alignas\n" +"CXX_FEATURE:" +#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 408 && __cplusplus >= 201103L +"1" +#else +"0" +#endif +"cxx_alignof\n" +"CXX_FEATURE:" +#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 408 && __cplusplus >= 201103L +"1" +#else +"0" +#endif +"cxx_attributes\n" +"CXX_FEATURE:" +#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 409 && __cplusplus > 201103L +"1" +#else +"0" +#endif +"cxx_attribute_deprecated\n" +"CXX_FEATURE:" +#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 404 && (__cplusplus >= 201103L || (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__)) +"1" +#else +"0" +#endif +"cxx_auto_type\n" +"CXX_FEATURE:" +#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 409 && __cplusplus > 201103L +"1" +#else +"0" +#endif +"cxx_binary_literals\n" +"CXX_FEATURE:" +#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 406 && (__cplusplus >= 201103L || (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__)) +"1" +#else +"0" +#endif +"cxx_constexpr\n" +"CXX_FEATURE:" +#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 409 && __cplusplus > 201103L +"1" +#else +"0" +#endif +"cxx_contextual_conversions\n" +"CXX_FEATURE:" +#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 404 && (__cplusplus >= 201103L || (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__)) +"1" +#else +"0" +#endif +"cxx_decltype\n" +"CXX_FEATURE:" +#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 409 && __cplusplus > 201103L +"1" +#else +"0" +#endif +"cxx_decltype_auto\n" +"CXX_FEATURE:" +#if ((__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) >= 40801) && __cplusplus >= 201103L +"1" +#else +"0" +#endif +"cxx_decltype_incomplete_return_types\n" +"CXX_FEATURE:" +#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 404 && (__cplusplus >= 201103L || (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__)) +"1" +#else +"0" +#endif +"cxx_default_function_template_args\n" +"CXX_FEATURE:" +#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 404 && (__cplusplus >= 201103L || (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__)) +"1" +#else +"0" +#endif +"cxx_defaulted_functions\n" +"CXX_FEATURE:" +#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 406 && (__cplusplus >= 201103L || (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__)) +"1" +#else +"0" +#endif +"cxx_defaulted_move_initializers\n" +"CXX_FEATURE:" +#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 407 && __cplusplus >= 201103L +"1" +#else +"0" +#endif +"cxx_delegating_constructors\n" +"CXX_FEATURE:" +#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 404 && (__cplusplus >= 201103L || (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__)) +"1" +#else +"0" +#endif +"cxx_deleted_functions\n" +"CXX_FEATURE:" +#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 409 && __cplusplus > 201103L +"1" +#else +"0" +#endif +"cxx_digit_separators\n" +"CXX_FEATURE:" +#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 406 && (__cplusplus >= 201103L || (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__)) +"1" +#else +"0" +#endif +"cxx_enum_forward_declarations\n" +"CXX_FEATURE:" +#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 405 && (__cplusplus >= 201103L || (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__)) +"1" +#else +"0" +#endif +"cxx_explicit_conversions\n" +"CXX_FEATURE:" +#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 407 && __cplusplus >= 201103L +"1" +#else +"0" +#endif +"cxx_extended_friend_declarations\n" +"CXX_FEATURE:" +#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 404 && (__cplusplus >= 201103L || (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__)) +"1" +#else +"0" +#endif +"cxx_extern_templates\n" +"CXX_FEATURE:" +#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 407 && __cplusplus >= 201103L +"1" +#else +"0" +#endif +"cxx_final\n" +"CXX_FEATURE:" +#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 404 && (__cplusplus >= 201103L || (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__)) +"1" +#else +"0" +#endif +"cxx_func_identifier\n" +"CXX_FEATURE:" +#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 404 && (__cplusplus >= 201103L || (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__)) +"1" +#else +"0" +#endif +"cxx_generalized_initializers\n" +"CXX_FEATURE:" +#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 409 && __cplusplus > 201103L +"1" +#else +"0" +#endif +"cxx_generic_lambdas\n" +"CXX_FEATURE:" +#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 408 && __cplusplus >= 201103L +"1" +#else +"0" +#endif +"cxx_inheriting_constructors\n" +"CXX_FEATURE:" +#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 404 && (__cplusplus >= 201103L || (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__)) +"1" +#else +"0" +#endif +"cxx_inline_namespaces\n" +"CXX_FEATURE:" +#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 405 && (__cplusplus >= 201103L || (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__)) +"1" +#else +"0" +#endif +"cxx_lambdas\n" +"CXX_FEATURE:" +#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 409 && __cplusplus > 201103L +"1" +#else +"0" +#endif +"cxx_lambda_init_captures\n" +"CXX_FEATURE:" +#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 405 && (__cplusplus >= 201103L || (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__)) +"1" +#else +"0" +#endif +"cxx_local_type_template_args\n" +"CXX_FEATURE:" +#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 404 && (__cplusplus >= 201103L || (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__)) +"1" +#else +"0" +#endif +"cxx_long_long_type\n" +"CXX_FEATURE:" +#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 406 && (__cplusplus >= 201103L || (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__)) +"1" +#else +"0" +#endif +"cxx_noexcept\n" +"CXX_FEATURE:" +#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 407 && __cplusplus >= 201103L +"1" +#else +"0" +#endif +"cxx_nonstatic_member_init\n" +"CXX_FEATURE:" +#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 406 && (__cplusplus >= 201103L || (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__)) +"1" +#else +"0" +#endif +"cxx_nullptr\n" +"CXX_FEATURE:" +#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 407 && __cplusplus >= 201103L +"1" +#else +"0" +#endif +"cxx_override\n" +"CXX_FEATURE:" +#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 406 && (__cplusplus >= 201103L || (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__)) +"1" +#else +"0" +#endif +"cxx_range_for\n" +"CXX_FEATURE:" +#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 405 && (__cplusplus >= 201103L || (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__)) +"1" +#else +"0" +#endif +"cxx_raw_string_literals\n" +"CXX_FEATURE:" +#if ((__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) >= 40801) && __cplusplus >= 201103L +"1" +#else +"0" +#endif +"cxx_reference_qualified_functions\n" +"CXX_FEATURE:" +#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 500 && __cplusplus >= 201402L +"1" +#else +"0" +#endif +"cxx_relaxed_constexpr\n" +"CXX_FEATURE:" +#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 409 && __cplusplus > 201103L +"1" +#else +"0" +#endif +"cxx_return_type_deduction\n" +"CXX_FEATURE:" +#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 404 && (__cplusplus >= 201103L || (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__)) +"1" +#else +"0" +#endif +"cxx_right_angle_brackets\n" +"CXX_FEATURE:" +#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 404 && (__cplusplus >= 201103L || (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__)) +"1" +#else +"0" +#endif +"cxx_rvalue_references\n" +"CXX_FEATURE:" +#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 404 && (__cplusplus >= 201103L || (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__)) +"1" +#else +"0" +#endif +"cxx_sizeof_member\n" +"CXX_FEATURE:" +#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 404 && (__cplusplus >= 201103L || (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__)) +"1" +#else +"0" +#endif +"cxx_static_assert\n" +"CXX_FEATURE:" +#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 404 && (__cplusplus >= 201103L || (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__)) +"1" +#else +"0" +#endif +"cxx_strong_enums\n" +"CXX_FEATURE:" +#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 404 && __cplusplus +"1" +#else +"0" +#endif +"cxx_template_template_parameters\n" +"CXX_FEATURE:" +#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 408 && __cplusplus >= 201103L +"1" +#else +"0" +#endif +"cxx_thread_local\n" +"CXX_FEATURE:" +#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 404 && (__cplusplus >= 201103L || (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__)) +"1" +#else +"0" +#endif +"cxx_trailing_return_types\n" +"CXX_FEATURE:" +#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 404 && (__cplusplus >= 201103L || (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__)) +"1" +#else +"0" +#endif +"cxx_unicode_literals\n" +"CXX_FEATURE:" +#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 404 && (__cplusplus >= 201103L || (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__)) +"1" +#else +"0" +#endif +"cxx_uniform_initialization\n" +"CXX_FEATURE:" +#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 406 && (__cplusplus >= 201103L || (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__)) +"1" +#else +"0" +#endif +"cxx_unrestricted_unions\n" +"CXX_FEATURE:" +#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 407 && __cplusplus >= 201103L +"1" +#else +"0" +#endif +"cxx_user_literals\n" +"CXX_FEATURE:" +#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 500 && __cplusplus >= 201402L +"1" +#else +"0" +#endif +"cxx_variable_templates\n" +"CXX_FEATURE:" +#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 404 && (__cplusplus >= 201103L || (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__)) +"1" +#else +"0" +#endif +"cxx_variadic_macros\n" +"CXX_FEATURE:" +#if (__GNUC__ * 100 + __GNUC_MINOR__) >= 404 && (__cplusplus >= 201103L || (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__)) +"1" +#else +"0" +#endif +"cxx_variadic_templates\n" + +}; + +int main(int argc, char** argv) { (void)argv; return features[argc]; } diff --git a/thirdparty/bmt/build/CMakeFiles/progress.marks b/thirdparty/bmt/build/CMakeFiles/progress.marks new file mode 100644 index 0000000..0cfbf08 --- /dev/null +++ b/thirdparty/bmt/build/CMakeFiles/progress.marks @@ -0,0 +1 @@ +2 diff --git a/thirdparty/bmt/build/Makefile b/thirdparty/bmt/build/Makefile new file mode 100644 index 0000000..c178b32 --- /dev/null +++ b/thirdparty/bmt/build/Makefile @@ -0,0 +1,148 @@ +# CMAKE generated file: DO NOT EDIT! +# Generated by "Unix Makefiles" Generator, CMake + +# Default target executed when no arguments are given to make. +default_target: all + +.PHONY : default_target + +# Allow only one "make -f Makefile2" at a time, but pass parallelism. +.NOTPARALLEL: + + +#============================================================================= +# Special targets provided by cmake. + +# Disable implicit rules so canonical targets will work. +.SUFFIXES: + + +# Remove some rules from gmake that .SUFFIXES does not remove. +SUFFIXES = + +.SUFFIXES: .hpux_make_needs_suffix_list + + +# Suppress display of executed commands. +$(VERBOSE).SILENT: + + +# A target that is always out of date. +cmake_force: + +.PHONY : cmake_force + +#============================================================================= +# Set environment variables for the build. + +# The shell in which to execute make rules. +SHELL = /bin/sh + +# The CMake executable. +CMAKE_COMMAND = /usr/bin/cmake + +# The command to remove a file. +RM = /usr/bin/cmake -E remove -f + +# Escaping for special characters. +EQUALS = = + +# The top-level source directory on which CMake was run. +CMAKE_SOURCE_DIR = /home/bemdeppi/ham/thirdparty/bmt + +# The top-level build directory on which CMake was run. +CMAKE_BINARY_DIR = /home/bemdeppi/ham/thirdparty/bmt/build + +#============================================================================= +# Targets provided globally by CMake. + +# Special rule for the target edit_cache +edit_cache: + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "Running CMake cache editor..." + /usr/bin/ccmake -H$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) +.PHONY : edit_cache + +# Special rule for the target edit_cache +edit_cache/fast: edit_cache + +.PHONY : edit_cache/fast + +# Special rule for the target rebuild_cache +rebuild_cache: + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "Running CMake to regenerate build system..." + /usr/bin/cmake -H$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) +.PHONY : rebuild_cache + +# Special rule for the target rebuild_cache +rebuild_cache/fast: rebuild_cache + +.PHONY : rebuild_cache/fast + +# The main all target +all: cmake_check_build_system + $(CMAKE_COMMAND) -E cmake_progress_start /home/bemdeppi/ham/thirdparty/bmt/build/CMakeFiles /home/bemdeppi/ham/thirdparty/bmt/build/CMakeFiles/progress.marks + $(MAKE) -f CMakeFiles/Makefile2 all + $(CMAKE_COMMAND) -E cmake_progress_start /home/bemdeppi/ham/thirdparty/bmt/build/CMakeFiles 0 +.PHONY : all + +# The main clean target +clean: + $(MAKE) -f CMakeFiles/Makefile2 clean +.PHONY : clean + +# The main clean target +clean/fast: clean + +.PHONY : clean/fast + +# Prepare targets for installation. +preinstall: all + $(MAKE) -f CMakeFiles/Makefile2 preinstall +.PHONY : preinstall + +# Prepare targets for installation. +preinstall/fast: + $(MAKE) -f CMakeFiles/Makefile2 preinstall +.PHONY : preinstall/fast + +# clear depends +depend: + $(CMAKE_COMMAND) -H$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) --check-build-system CMakeFiles/Makefile.cmake 1 +.PHONY : depend + +#============================================================================= +# Target rules for targets named example + +# Build rule for target. +example: cmake_check_build_system + $(MAKE) -f CMakeFiles/Makefile2 example +.PHONY : example + +# fast build rule for target. +example/fast: + $(MAKE) -f src/CMakeFiles/example.dir/build.make src/CMakeFiles/example.dir/build +.PHONY : example/fast + +# Help Target +help: + @echo "The following are some of the valid targets for this Makefile:" + @echo "... all (the default if no target is provided)" + @echo "... clean" + @echo "... depend" + @echo "... edit_cache" + @echo "... rebuild_cache" + @echo "... example" +.PHONY : help + + + +#============================================================================= +# Special targets to cleanup operation of make. + +# Special rule to run CMake to check the build system integrity. +# No rule that depends on this can have commands that come from listfiles +# because they might be regenerated. +cmake_check_build_system: + $(CMAKE_COMMAND) -H$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) --check-build-system CMakeFiles/Makefile.cmake 0 +.PHONY : cmake_check_build_system + diff --git a/thirdparty/bmt/build/cmake_install.cmake b/thirdparty/bmt/build/cmake_install.cmake new file mode 100644 index 0000000..5bccbed --- /dev/null +++ b/thirdparty/bmt/build/cmake_install.cmake @@ -0,0 +1,50 @@ +# Install script for directory: /home/bemdeppi/ham/thirdparty/bmt + +# Set the install prefix +if(NOT DEFINED CMAKE_INSTALL_PREFIX) + set(CMAKE_INSTALL_PREFIX "/usr/local") +endif() +string(REGEX REPLACE "/$" "" CMAKE_INSTALL_PREFIX "${CMAKE_INSTALL_PREFIX}") + +# Set the install configuration name. +if(NOT DEFINED CMAKE_INSTALL_CONFIG_NAME) + if(BUILD_TYPE) + string(REGEX REPLACE "^[^A-Za-z0-9_]+" "" + CMAKE_INSTALL_CONFIG_NAME "${BUILD_TYPE}") + else() + set(CMAKE_INSTALL_CONFIG_NAME "") + endif() + message(STATUS "Install configuration: \"${CMAKE_INSTALL_CONFIG_NAME}\"") +endif() + +# Set the component getting installed. +if(NOT CMAKE_INSTALL_COMPONENT) + if(COMPONENT) + message(STATUS "Install component: \"${COMPONENT}\"") + set(CMAKE_INSTALL_COMPONENT "${COMPONENT}") + else() + set(CMAKE_INSTALL_COMPONENT) + endif() +endif() + +# Install shared libraries without execute permission? +if(NOT DEFINED CMAKE_INSTALL_SO_NO_EXE) + set(CMAKE_INSTALL_SO_NO_EXE "0") +endif() + +if(NOT CMAKE_INSTALL_LOCAL_ONLY) + # Include the install script for each subdirectory. + include("/home/bemdeppi/ham/thirdparty/bmt/build/src/cmake_install.cmake") + +endif() + +if(CMAKE_INSTALL_COMPONENT) + set(CMAKE_INSTALL_MANIFEST "install_manifest_${CMAKE_INSTALL_COMPONENT}.txt") +else() + set(CMAKE_INSTALL_MANIFEST "install_manifest.txt") +endif() + +string(REPLACE ";" "\n" CMAKE_INSTALL_MANIFEST_CONTENT + "${CMAKE_INSTALL_MANIFEST_FILES}") +file(WRITE "/home/bemdeppi/ham/thirdparty/bmt/build/${CMAKE_INSTALL_MANIFEST}" + "${CMAKE_INSTALL_MANIFEST_CONTENT}") diff --git a/thirdparty/bmt/build/example b/thirdparty/bmt/build/example new file mode 100755 index 0000000000000000000000000000000000000000..657597963bcf7f09e0c490fb2b06178be6842749 GIT binary patch literal 72600 zcmeIb3w%`7wLg9)2@sG+c&PXu6%ZeUOcFw%iUY~O1Oh}7C@OH6%uL8gUd~KHprFV@ zi2Hq$I zpN|K2*4bgK)(W+Onp+#M zjn(=iRX+rk&dqlU9^q^XW+E+J=?EDJg=rKc=q6ym(?v|v6jZenr268Qn{OpN{l_w@ zpr$zmg7Q&Y_dO`0fXl^=g4NU@T-&Kp;&O_=)I7=;<9spBr(hPhUqRJBDuzM@(<{>4 zh*8{4Kr0r1<4{#w_FgnV_eZwK1V^^G#f^doxSa~Be3u~~wex7NEKaz9>r2L$`1r^S zY8)okgenX2C)NZe)P(99n<|2$i7>huCUW0Hw0`>7b2;&j_2!#m45q^abMEDWH zYy`SCNFe^!;P1a93`1CsK-UiuZbz6!5zBQ3$4>)XiEszUPXt_#Fd1PcLI{EM?Jk58 z1RbFf;cf&Ug85p4$UO)bGqN1;e!kCTxDGIkuoWN2Y3_0H3*9tw-Im* zLKXszQ}ZcVY0o~KmQk9nZMn~x-QSrO7@p?1?!>Cz!Ir|KS_U-OrZxAMhj3 z$!^!PXJ(zfB_s2Z?U`+phM#m=+IbCw9&+4q-`TF61ExReTYolWGILO7$Mm$bH=M1x zPt^v_96u^Fa0-b>=P9Ey-Mh~j1tw*ku{=|AzdE>ZG__?c0`-e78i(=XPSc1N#9NXj z;sePs@twwU5<&rjx(adg9ThP_WoHPbWQXLIE;qt-1d@Sb1hQRp%|;-7B>5#?Uxq+> zV3vI)vt(z8w`3P+e3l?AMOcnNmrnxm*N?ve1hNTA{#N3?7NG$_=~5K;>T1MI6T&Kl zW=5`IXqJ0Ik&Re~Kyr5-0$rOB$d2BCfU(h95$IC#NU}%rc?-g=2xM#Nx(i_^0?FY0 z2&8Mf5PpX63j}pNh#MN*M-U!E_!R=_?H&a4W!6WspL9JXf!4){-j^@z2UQ&7cbw?@%8$#d28PM-Mc^j&sz&t4}aj* z^C#aw_U`c)&f4+knLqpey=OlCQ2Qr$PHn4R_q~znFP-uJ_jXJz%guSPGB$5>=hC+E zf4JzY8Q0$W<|BW7q4oD0|MBJg->B_Wt7)GfD>BH~Omap4Uo$c>QI*A>r@dx9*me?|Rq1a?g~_550HUJx@;Ca@HFm zr?3C|6?;FaDj9snWiR}0<+>{mymoge&QYx(v`gLf3Hj=Xe!;pCZL?6@Md<>rf= zlULt9`QT&wuU>rms&|Iy`4<-N_|-fAar-kL{o(8OLicTtjy&Oazd7ZUajvX;KKkV4 z!@vLJl~Q+$_mHRS@`u+)$BdtI-ufL=fBtgiZI`ck_19kmBZ=G}NgLB?(t9<01b&nqU`72Mpd&}Ap zJFEX=jPfi^tU^wPXF<~?_4#o==Do(e)EwXzk4(~?vo2{IW%g+iEo{AL%+{jr*yIS*@6ok z((kYm+tcmwSxD%#CPtj*1Uoz}kI1jFUT?xdu*1(zf*+S*pMFe|^uv>+C)?_@HU{yL zN%}h{37+hxT{&Gz+VjUGBbM^s$@ak|7W z4@$i9Eq1_3qH@x2koci8x%N{|A6qH$Xhb7;*i4q{QEMolLLl-Gl^` zeh#NsGWHpqG{SGaJ~90t&Xo9>D7`&Mk1NUm4>hKFsCJ;2c|-pPM$z^r~Mg&yndrSS#^9PXFsM62FD{>16zm(J-p_ z^Hnl^5#xt*zuvX*X9zNmK>GE}{~)J7m?_Jt;`Em=ek=3yajy3&#(#2=O#jG6x4@9k zMptm4#AkDRFs$M_!1(6XZV^}fDI6x#_uxMqPU=@R^HYuQ6A(PYzgH*Axk8ktX>Xk; z@lSL6$1;8n^JfE>L-vub8&8tyE13T({T)1xpI5s@I`aSMD$17WFRgcr_+;@HzCX8f zDa)H0$L1WFzJbd@c5&T(ip2kRja$T>;;*K?!}9+Q%gHRpPwy|&4`F$@mhsOpKl5wd z0z*C|U0X-X@m2eQ7f7_`KUbDhe63qx3K@R|%T)*W>t@CeV}8EO^(y}VlI5yD*LxbL z_psdVWBg5w|B(Be^F6n~sB+GQ0MIx(SWX~L;vzqpbk@1VE#m6W@41~1Z;*J^{#(&s zD(3|rM-(friZdkMntnbQLg_!gUY4WmSQjQ>!hgi_IhV^hIalI4S$@uE{1q<9O?~jfp~%KbLJRX&eUd!pT_tD<0Ssa)r=Rp zu#d;%`xeWaYR`91km=9m{(3n5Gc3=;Z;<5#8DGhEZ#K(w8RLrzWI5Y6$n?lAuBmKS zXY+We{{D@}WokgCS8{kK_(1$z%ltgZ{rVA)?^`@yzs>l+K=3I2!40yU7e%dDUt=H$ zZ?$_j50-cbw+GdUtBdE;!+y7jD>?j4f0_QGYb1W0pkfb$@y}WK`D3TDywnA)ooPmtP47M1O~Ahq=FL+|JjXvK*`3tKt6M&hn||mtibFR=@5AmXmQ|$%hCJ zaDUdjeN_^#$~ibl)@#+%&)A;a#qyxikIa(kmu-;cxHNgCpD6KGJ9#0?q1Eqti0$Wn zJdRKkac#jo=mg)K7W+Ae_3>(!t5KZ3k;mQYPkx`mSmnH3*`FG>C|T*z zQ+U-~)PN##7jdZ*-JG!%sJJ2uO5l>Fbxa&kW75fxVzGE(|Mtk=rkmT~!Ka(_?f z^m*tn`MnP=a|_I29$z}}p>nKxI*R+ds7|K8nalY#29DCd%I%rS_z~RBO)MwHjQ2XE zKXZ7AEQirrF_*uY+o|S-N0ESRX4%rj^e3`?u=;^ZxE$;J_3M7JomT((#Iq#6jq6o> zz8-uay|c>CO!gbCe(VS?-|A03j*bw24z7{ySN!=hTjHlNUfG|1?1w+mB-77A-|1SJ zE7Q+r{hcGxnuc@CNHpNT@IpGfEYNg)MQweZ9*O$GQC-*c*-iR_;EGTr8VncJ_#%;D zMAJ(u=IVi9n9&t;i)!lYf)&2XnxLfFQ}l*tbvWn?=qrP(bwBQ+NPfk%InK@54EwmS#Jo2BlUH@noxAL-jpMAy9$vt zlY1qEb(h%L7uLUVjTuJKusAM zqx+kEdL{Z;r~7?Te>Di@(cBzeM=$DE={3Q+713%v7!KEm%jWn6H9uGvsIjZgvw-T6 zO{?ctRp=Etx~IGVJQh_UuTjyw@@Rn{)zsDJ>5Fn*x!}Gp0H)Q{`&Z`G)vprFDv#En zE6MtnyvE7;f;r`tCFS{g`6RuJ#M z)u3Ir!)N4<9^1gEVV{Glj%h^mz+BPbx>dEz*Kkk_81)FvpA@WZh_2>7DWQrBT{(tC z^boA=$yvK$^jBaIC+SiIz(#I*c{JZ&-xw8qg!DzLLlF`j45Q1X>yesZutBe?4}0Px zT^MK#`=X)xx{{jm=p=DlQd;BkP{1{>+@*t(t1D!Vx`-<3q3#5XyIRuhTB+2LtHV)> zuJA)O2<6iBg>|b!5V-}xNMmh~h<;@NNbsSY&7r7P8?3FZZwi8ylz^>5tuItZ)nIg3 zN_%O$YGn>uISDMOi%RC_*VWhh@+xbilcJ$o*cO(vIa8qAb&b%7W>;QaaFrA;U$j14 zV#lh0l~v~IJf6Z@U$Y(wtqJmg#|Jt;5{y>GwSQ)9w5rCpB7*VfAuAA4O%*Gv0mbM; z6FgJ=^>vWSMt_u4ZZ>o%Uk5LWEBrOCT)kX3$W@Sth8#W8;IEks(OBgR2TI69R5d_H zJ&-*}S%qHQFu#1dYGkgyDCnmqlELgl>(C|98iV^bBWlND%UBw@YC|RV$!kb@6Qr3; zay^8rPLHl`2$rDndR&U6ZG_BE!d&ChD{_eeo_tD4MyQZ1*bKo0lEEgygX8Faq2Jfw z^J5+%F_Dt)k2LC4p}LTi1{QO&{z^+>t0Pf8RNGLK*z_LCn;?;_zY-@^J5XAg<0-9# zk{4hiK%rn^f|gU)F-i;O2ou1pLAzj|a&^5ngqc~6KM;AOtvaEE%9Qm*V37wlYHqw- zSU6XnDpL%Xr+i9W1dD?Z@A}ocud%sA^aCm!khqdQHK3_N#}FrQJ}w&e@`ZT{#?dTe zP0$!{EC&K4CR%ACiDju#nRr1Y1FS&h&_x;r;ZYbi<=DFt5UsC}5^h*ZR}Q?gC|t3S zKayYPt7WS;$2LJ>lP?_d)%j(j@~B}_h$}tqwjEnpN=g3aSLh2!E(*dyAz|TQ75qPx zUQ#|;XT@a0nJNY=A0QP9UYx5Yfa5n7t||Il9bQC@&mV-|`Bw_TH|%;74740LgUhgr zux>p)IoLOo&HDz!Q2-MtG}CM~XJdXK56&=+DAAQe6RO@&AF7K&_Mu5q6N>bb@?sr| z%ie;L0aspvNlUb5c49&DV0J_(j+RXc*3k+;4^@>cj)bWpdAbg-N0pAYzyoOV)iee@ z)F@@mrB&m8uneVa@|~;j0wJB9h1j7gp5V-Hm0n2lJ-r1{V{a})bu zOKEEH#M?7T8Wck?<5snY@L^2N-kOo+n589dL;~e`kbq;A6vMoz`7c3Ij%Q*fxwX!; zn2h0Z6W8LC@g%g`ZvLHTYj-gd&@!nO%MYxol#SZYuAiXoqHOb_QCXjdX zU8PXM2X&u!Sa0DWu({$OZi$lF4(j%P^?eswuUMc)5~$MeNAq*l#QcV zzNk;`FH#XHRhRf8fLytzvfkF!iB2!+f{l+L+Ik|u0osvHMeu1U9OhRz#(De)Xa z&7MS;v{z5wZav_!*_X0WsXnj!?6#A8H8hE1l?t~MlRwL_{icemFdVGG77X^-UmNk5*zwtR)k%ly`R*dAi(^a zjtLS@5p47iQ(7UpX?*AqpVVd-H=uE7DR06g&TU2FWTBW1cNe)#Md&nx=c*02*AK%~!#}7Yf>7Qu8{x_DGVpAgd7DjJN%#^wicB{f3bL2NK zPx>|}v?@xSLgH+vTvyu&C8U$Kn{v`KMmjn^jT;fedej=8F(1U)s@;^tX_7i(J`0~> z9E96$U%*y*n(WUj5)XB7q;9NB#j2D%G;CrOgne~^`dZ!R_Xi^p<6KOS`c`;^k)*XO z){JVKuJ>tBX&f~RPGlJ!MB*5d&%$i`A75RiZUD{m57b#ql4wyX4wR37_MRLLMuOp{ zpma9v4Q;N=@S-qHRZW>>a-`@o8i>^_#6x8x{iUcC}R zDxSPz%X{3bd;7yGqZ9GNs+3yGLSr+owrq~t);aFJU@FQ#(A9F&25KFeWPugtY`2g&MM*cvy; zHrm){=B88?OcQ{y_j0_OICwb2&;wy-F{|RZ1`eBg5~L5Cvhr|QpBFF0?y(O!h;bV& z)n+ml#SPdFla4Fa@u^sg=ZiB`VLjqI0||%Q*j_)rLv@dr{Mt1Z6w zzkD;aMjVR5V5#Fx<6#xnxLOz}C=e#1T)8&u+X;ZN)yK9Ck1g3LnbN3~E-FmB`hDdm zDLU>i^IaQ_C)d!pX!GL4C_!4uFP%u@JW%Jxl!b7wwVbX)g<5wu-8PGg>EZ4U(D38fQrYS`|MBAsC zrH5A~wr<3r38P@EP6fOeP^&PhQ#JIEvE?NL$sw9ug`4g1RFX?#dT4~VTnG#Oz>Fpy z%a)^>GUKPzKc!B%dB+4#l#7jMI23deXx_0unq^;aY@^2_Q~ijW?VuX#)??kbqQOcj zXO($NX`!y1gvWU%;c=c>r6n_p^xW*+>`B_Jq9Q#nJ4fOcFUEa-wrfgav`{NrymF!_i?_XX1 zzoyl)RBC&A628zVP^vf6=^aT8@7dxzo2Q2GW^JbS0N(qji|7YU^s;fQPtfo+(uYm- z0eI&$F@J_Be+EjBbSf`&D%5pneHMjKa|36EE>jcc(9zsSI9+HwRo1{Z872LeO`5KH{nMrB;XDcUcIADD^;>?K9!kHxP){VZy6-y$QeHgjere6TZ`g$0u2gD`vvuQw+v+ z*o6O;fyMWgO?bS*9KZN|cakY*9IIuT@MoLwSth)CXPn5RO?bTWY+T-rA7px4`>m^R zYTnIl(SETDp#|QJdo!PxNxCP6LFyhG#ou9Pxp7DFY9e-ZcH0Nh=_-u-kigj%l@fj2+)#_R=;!`P3-S27;@mz|NDs?Rv z@e3$UD%4db;^$DDRHw@=;-^v^wo2>D5%CceCspZkiufRklZtd@iFg{t>2>ukO~n6E zjyS1G*Ws^$82d5BNkzIkMf@PeNj17UMEo6!kDz#)h`&Mckrdx8;xAL2RG{l_5&td4 zPo(&E5r2;2CsBO8h(AGbQh}}p5r3HCB>G*;Mf~3=P9on`CgMM(IEi$ZTg308IEivs zj)>nvaT4J!r-=K z`^O`V$ovE)^lp9`nY}G9$o%hc{`Hr1l{+!L4PS_yv1`zwr7!e*9Vo{q&y#$6H(s)M zJ`zOFG@3AK;WM2NqIE6rb$2Z7`W~_jJ4@?|08$y{-s?Wt1@Uirss_FCwme3o;%)cr z4^Si8Jsl+4-ff#n_?{*;@wT+b8X+%?qZZPn75gc;`80h-ff781h>@T@Hcm$8qYR>+ zOjT^VT|~ESCNY1yd5g?=5v6RM+u?0pxZk_&F`}Uyk{rr-0D6R|z#{sGF9lT%FdRp73Cnhfuil6PSIK&*|I1%2-DHp_RTej zkyfDHbPM`&K^N`Lfqog7_E-e!fOMh&RlP_zSoW{e(!b7C+mX9L6nnemtw>5tS1yoiC!E^{89+S(I{?CeY%Cma1A2f7Loqr zVOd;jd+go6Bh5}uvxqV%+5R(SP;73Gy$;(QuUECY4b5t!X05kv^=^=N6ZxhDvTC)G zpbl`vX!Ue+tM{ULHm#mOt-gm_eK$o^tItL5CrQ>!O@Bt{g`r)$5pCJu^%5m&c^>I< zgv7PSR*1^>Q+t0zofkwm6K8{X2zHDzC{27Io?)etp^P(}V#=U1u90vT%R;31wZ}d> z0^BHpBVlTfbrOPWQXKjcGqJ)cD-i?E+aj9#AvNT>y=367T>3oOJ2(tkE$hB)X*{xV z-ItEW^R8QmQejZfeCD!;VZ}~I?1}NPXHQe~8T!m!tPr>tMdqg|=`&;nW4qDvrO$5{ zMYgOv;%#X>>}~ZRWC0+W#nlmw5Utv=rnVL>3zThdUHBymJ-eePZeSeO>iO~uj#00@ zXi7`tr_`EOt##q2Ew6*8PtgFhE0zsBsRB)>XsbY_<&s zji+g8CX`KNRQ)JNM@CaKysb-Pv0J_%$xVlEI9iEIqBN+(f<6XpEph17ZWdABV@ApI8xaX(EUt%z>i>s3{6{2&Jv z?MFo&Ac@jL)Oaeh)dPHc>~a}DjMAu{!(3W9(9DMfxzlJEy)Ez`Sj?TW>z)`D)_^vA z>>YMN8%!0XaT}7xi~OZz8*1=$!UlRed-9pexDUxF`(d<@(jKO?hhF?)J8nWlCR%Mv9Qz%CgY84j4|7t(P%0ioT!eqd^Cc1^reD;cIg;FCdyOlD+#0C)-w*12lRg(EZ_wAIc*mCyPLj z+hboKtu$(;a(7U<(yE>O8Oll!rYtZ@+L?`QqKHRjbw^aI==*QRsG^-vik3#|F1pzs zn~FZr$P$0R7mV(hBe_thswW9=2p()v)o;%C>AH7m&tLv|*6cB1An%m*v@Gbf>p!y}eX>k)SQ6;;8G1 z)%{Soc2s*pbtILG(M^n>G7;`fa`aC@ztA^Cwv~}QAVFDX*WF~8C?ZFFH;?)fDI-Xa z>9~x%GbGRAganaLboyQ%m|ei_LQ~qn5J(jC83b?&T&7xSsnI+@==Y%$X+81BgB#)W z4SbGJ&wV^>F|=!0QqJxsH%vAXbuUF@4duBjvHl`bEKxtLrHF026ym=H;&RU}!p6W% z1QIUMjd}4JMsp1y2?Z#$)Bw8d(Z(Z=Mkf>;-D7h*!x9~TsF<4#0fEJBt)Un7sHb=N1xg($c%B>=x+9#&t)u#<6k3 zlPF!6kXHn|-}Q58$&S!)ZEkD47RV#sKZ18ch}ftI9@dDG+ha`-OyY5yN{Zn&8X!{6 z;FP5zpPY6uVH8Kd7cUqm*xjV6J=79zv~FB4$Vb~Hi{R?Y1co$IFtr>T)T?A^2E=I1^ezoh$8@PL?3jYTa9C9Bj*5bQXAQ!>uKiIG#PDLDTG zgq9FJIRB7H`3;;OO>O>xnM< zci>QW_xh(OofaL-6!->NPY>k9Vgw1E9?Fp_M9hh@7(dUPXgWnkGbP_lIgcV|G38V< zWe7#iWr}8|d~y(!^O*8JMO0({OpzR>ykw?4Ly@UWdDu+3pCWUavcpVirN|XbxyDSn znj#HM2_WKa-AoMjZo8fM$b!;}MsHWVfpVRO_@Oi8%2g(7j|~8hU9}pz<#MjuOT&h# zMp&2W-<`f_`oifIG@M@Av~GE7AGtU!kF^1qM)Qu3c{DqKm!o--Y#k-UAbLs3U+L=Z z-gwxtsV$m~`UGWLnHLSETr|?${(mB!!P;vOQK?bJ-^uc{Y~DjTxD3L|31{k;yU4yLee?WPEISDfg+aJ79lnS;g8gcmr#vDYJ?Fw{fmrlKldhcEwh3E+!ishi#>n+%0nn&;lQcx1C&^i^(e2AkMWz z<`Tf{fG#S;7?*i&ysYCzi*PY5r2_H*%a)5nu(twLOEsc+G9a8 zaX%9!x7%Z0Gx148N?Qc0sQ{H`j5*DnB8?bJa~jQ@CL+=Zkuj%v>pfE!=8H7g?&S<~ERPBVbgv_2;E3~U-NqMP>-)3@E;K|recSzsC28i{I;{Spb4tx>oi ziuy1Ox5qRjhv0~EuG>pC<;v+xrZ1hoY`U&IR%2TA9B$P<#&&=g4j+aTn^ShX27_RF zKsCG_tE9^}z6nmzI|B=w?y|lU7LcTdX7;mP8!2F(u>M<0pari#rdWc)K>e_u6hh;3Es5b}u*$^4>2 z2Z$&u0T(!t<~{#8Z%c<)oO3ab7=hwMrvIiIL?-NTiS;jB#8Ll&_7l3(>yUY{kd~={ zU5L`&Nsm*quzDMBS-lyuEXPCc%pUQ!iedn_o?7$Q~n+ zX%q8&){ytHS`_-}9d=P0d)v@cQhwo$eu{~LI$>$P;4Kb%&}M^@Q2u_JVB{`l){yH& zC2~b;H3Ij_4F}X`Ea~jDcSjNnpELei8SA^J@ycs8k5;%CKENjTc zc!EHz%aV-H7%`U|)d;yo_js*&k?@pB_My@OCkFguplKo@7POfe_hag(*tmsei?yIg z|Jr|(2&h$f7HWX}Npm?3md0Rh0_A-?rC~!MeHRu2{lqpu#kq3{E&(hYGlkF zUNj7n2}9ljd6GNk5K1stYTgFX0-g}ik~0PWpA`kSiXBblYLDG2OO73yOtJIc7P2MF zX`Kt<7R++OPRHJSiw5xd*em}<5LLIwnkl*5o>p8)N3NJtogF<=#A^&r{{`32MK0+u$@Lg>JvW9hgq+g6)_9RDv_1AK+TpRwCKUKHaP-hQD(xQ3 z3v$$AK%oI*)OHhAbcAfhT!nrS{fljSgK|6{Tl*$K!Kt%F8}_R6A-GCeX1lP9#*jq9 zI3B{Sn>Ixl?WVmE7zibk3Ar~Rx5akI?I-hO8tR#djnYyf z)gv_GRMC7$wtN6cGU*JId`NCx8trqU-zcfEQ&^vtxl}S3EA}~o7tB4+)S{lcgjDUZ ztUp`k*&}g_CbjJWagJxzwtv7#%C;zN!}th`1EHhQZI5*eH6_In(Hm)}kB&#*0Hy=p zm-J-^BL+EZk5%m#k|5G9qqOXvf;3-5#e~uU(}yk#`FctiYR&6Fp13tThZ58CJ0n!h zP1F!zB107>0E}>+pmK&VDaRWMA-IZC=@}%du01B75TVY1+#VB8G^mGuTEyXIfV9ix zh$_+SDMC30@2v8Bahw8~N%X&rSs=?S@uT87dXDfBYU;-%{NM2$mhA2qnAvUj1fVyx{4^5hdmgX<>jD z7Umq0A;IQh^i1rB$uk;}P#_9}!G|`TAD^>e9_h#|dKL)q=GAlrXG5?-)Hv}qNKyhB z=Ec$R;`3G6!_L}-=HPerzB|38d-|f1md~dzENOXddPV8Bu{q=!5#&K#e~Fgmbt{VuGvGMD9e^_c_W+gy?gtD49syhpI2w!YZGeS&D>;bF>+z+@N@Ce{;z|nXz`&qz3zzhGYyL&lc1K@hVt$=p}b^x{k zehJtKI1m2p_yL;M{NLT(Wq=O=HUJ&~+z$9P;BLUi&hG9G!UG-#eC{382NU=zU?JdP zz{P+cWACOJaOeTF7w`hWJ%ASj?g#7uJOa23dxoR&bm519g@CiL)wdY%8o*{i+85mc zxB_dnJ%HKR&F=*K>4)9jS>X3`fH{BzvByvbSOC}nxBzfF;7-8ZfWHOo06YLV8qZ07 z4Oj>`1G{_60oNby?%o1;6W}huzXR?AoQd7I7~pEaOspq=0XQD;Wk4^jF9GRE^+N0- zZUI~XxC`*7fcpS{4;TYH5!=d{Sj^uBI3Dl~Kri5@fb?YhB1|e<02=^z0p19>50G}- z4+F*kvvB4;0$cYvfcb!n0qHcL8E`$|4nR6jYXd9=>;&8dh@V2!>W-jZz+V7*0bc;5 zC*}_UZUO8MPj?sKD8PMy*?=*?D!@!Ql{)~(13m%h1^gJ0p0Ixsp2l{-f&aw#0A>U3 z2lN6S0bBq$8Z*vvz(T+^fQtct1K13B5O4>e>r3PZq*Ijrfb`_>5x}j0qetK!6u?5j z-vcfNd<(D{@SLxZAMj$pJ%G0Xb^`u8pa%V;jbbO@KLFi;C%}tY4tNUSdcZA!oq$IG z>Gx&nQt!Sv)-2E*%~_7q1`o*G?ii3oczP(mt)sj90!ql2eR@qB13IN&{(H2649w81 znL|g<9(Ka2%=Ox(r+(*xyfHMth&083J6`YZru5qMp;_0b6%FbCT~y5U0N`z)gC%i# zX`H?Z=<9pQe;?=@dda^R^a$u+UA+9c@$wG<9RywURkd$^oKBA@Ufv6R4Cr$~he*cD zFNx)v07ap;KCl0R*^tb@>0PASqok8~b$x-_@ZPFlss zKu;zoTR}enda*hGwef!Z0`$y3b$4H6rmv86YUfL!j{<#xnT~#@c@x`7WXGDaQlTLO z(&?un)1{tBxu>$uL^~_veKOnqdym8K6UQz?#?#X7mBxc3Fz6NPcze3 z$LVW9F9n@mrZMVR7^nXT^qHWqGSkz-M*Y75eJAMk3G}N3o%-+==pTSCe(PAtJ@w&L z*$2ud#y8f8%BknnK+V$ipPPER2k^t7*xb_PHXznMJ$CeZ78 zq2CAkRiG!+@4cWe0sU5U`_gVQ>OTN_2k18?&|3_827HfcZ*_OyoIt#7j-HebaUD?g)H)U+=x3&Mq0qM&!jDCHIdT)fU<21MXNTJOKZtMd#`%x|UvO_P<>Vtj^`i)7X1ru7=Nic?YChFhigMmS zx@l%TAbrbE)VCOBqi$Kpa z)9YkgiT)|*pYgSwj9fM*c@Ze;M>-a`!6eZ-YK7A%9$MKLx!XeDiA)=xdDfN5MxAfS#=V`Jmqj zdb0LcfW8TIUqbmyjPj$PztBtm+dywiLMJ)f3%P#+^!d2Y=rGA++H6q=RZDz)3+23n zbjjrGOVB$&Kc6xvISa@lsGf@;CvSj$4(>BvHPy4!s7BONjJ3(DNS91r0-#elW6kxj z??CmCymx|rC)KmxR1eMF+hkqTpI@MyGOVTY&E?d`=`VpE0KL#mkHq`)CF)rV`a!az zpC;Cm-YIpI>KTPK(=@ErXtT!POLM%Qe9)JGo^<>{Uj({cf2sT^=vVfV|2EE_Y}_6N zy%_n$d+2K1D&p;X6?AI99UrLvPq}=TxqmhB{G$eAZI1jj?neK4%qf39=vO5vpT@HS z^bMfDXwFY#`HZZC(3Ea1(w&PZ<>pz_701(2efJ?<3DPCw=U&j~fu3wU4uI|j9pVx1 zk0;*W46IReK~Ki-F`zqpDZd!>Q$e3$E`MFT`~c_|gFeAbH?0pgfj%4b1bYR3Q4ym5 zpwq*T7n<|S=|Je;UeMQqo{T>SK;Hm*(*C34HJ~S3AB_QhIp`D2^_$vX40;{t$@mul zVF~C7_7weF%l+E~I9NjWP9>Lt*r{i6ICL$hv9^9&t*Q@UrUb6bXI==iPuyhC%Y@9!O2xN*ST zq3)WYh0}-TOdmRa#!%;sp`($0#?VZ$e=rJrANx=a+{uKiyW4Tnla79vd>#EZr)gb| zeuo{}kJEqvWmqBLnH4hDVDpPwcQ=&>KRq2SkGPPSA{r)pk+wJK0 z>r8EZTE7s{c!(&#)$g{YG3sGy91Gj zGarQwN3HRYeu`uGc_@5dzsntwLF=?uYA88kNS8j=aq2nX@CL`^KczX|CrWF&rz2f^ zn_4OpCG_;$$hRE$mIL2%;9CxS%Ykn>@GS?v<-oTb_?83Ta^PDIe9M7vIq-i22i7lh zi)OsxmvA}9Rom2}-U{g(v~(#veRI|vr*F*Cg)KvIHC*Wy5L;;C`t%03fMV|zcl#b( zF5=>B6nEG<5trD@!rgwR)BX-!IC>BlSEKDnbBp+AU&~|MJp*JM`x5fPHv{RymcO_< z2D=3mdwIC~TY-pb*wPi(Xog~si0=t-H}R0wa)DKQJENJPo&&)axwz=sbr z$8%iuBS%p23=Kn7KORY`Csgxy-Gb81an)};q9o{t7fQNF^#8-37>|dwst0(Sf5q?x zhOaYxpW){W`!A90K8fKth6M~~GF-&4ieVGO>lxm`@BxOuV)z2X*BQRg@N4woB|(V^E*A7p47$=kCC^I#lpH1I#+jAKehYna5Y@-x1EQ;?`oW7WEM}Q+=>23qc&^aLg(~yC*lR! zF3qui4XVo!TeX>vRk(MoMct*iS%Q$}$fO4)($WSv31keQrl(~J1x|DHCq++7>qlq9 z=^6b(by3jzxse}9N(eamlP$}j!!O6^X8awPu5EDx;?Pu_Py}$7rqM$-nU0Nw#?`Z% z{%|yOe7&qdYce#dEz|KE+&h}_hk9up)4u@6ok(9mU3m3Ufe~HKke%#TO(hhmtU)B8 z%kiUk@wdNU5XJcfMVE;vQK-89mmAR?hz_`eqKiJqZKfN4UPBld(j4EVW61*ugMP;{ zbAn@G+O^sVj*7Hv@b|K`bs!FYgo$M+MH`aYCI0>mf0^+^shDBEpgMH=v@KoASUY}{ zbruK|h9mD-z!6jNXC$?JB(+!k(MD|q(s2>Je=zEYbem1LCx+>EvfyZ%b~PfIj?MVH z9rukQH9&oNxf>Ob5u!jYbNTISfyk^NAQW4L4N(2*yk4?S&Y7N(fAG1>_yOhXjH z$hT0#jC?J9=+qOwb3)0;OHWC|jM8tMRx(oLMu`ji+n0x$23X5H$tVb=#w+NT?YQ(5 zS;cv$P(JtQJ7=8YKo$M_89CC>z5(%;fLj>PlPS?aU^8X=DAAyn4G#1R&u8Aa0gsoD z{{9R!c`%ULaEA-YhYUj}sr^G`H-=?vHnqcba5zdvQu)KrMqfvq=r~U_34Ei>qvB1u z^puNF!El<3Aq1FfWzS+@(=?i+9gcqv9fH3Ow3D14M~@N5Gn7$ZIEpc+B-+3$ZEAgP0!*MPWrD+Hceow z!7Wh6KmAfiY2zgmangk54@^^YK>8mvM;bo+@3>3UafMwS6B{GpiLjr|rd3x|EH`6peeFk~^|#gEa&%cufLOzr<~F|n+q z2&_%QN{G6+av3_bh3b|ja=ILvu5O`m;>yrIaV-1^M|uENfJ3WLx0Rg8p)FIl=CUe| zqO8kQS<8*Gma1EGS(o)FD~~f2GK7KsiMn586jsi67Jzk-xD;BMIHl;ywkai7=@b;t z9w34#MqFg~>2iv{nszor<$tTjs)jMy0M_=XcHGE*uxiIwH@J;FM|(rR2UD2VGavoF zPK7N#U5nb{GmLezX1da~ewtb@o6uP6_Se)pIS~(!@GQ3(CGOMV``fKU)3K^m>sNE8 zbZwBP*1?JR6O8q8B7U%@#yJr`MBC1MPQ(w@b}x0CQQ|%wFTUVg|dD690jeU$n{Arr=q2*?jxKGDZQtEem%*1rzy_hlbK3zMrClZ)+?W`nt zXA->9#zbNRll@48TphN^ z8?6Be?_B1_UHr<%E#a;EZNDmatULdQ17&i}XF^>VDwu z`umVbKU~XXc~7RAG&iR)5KfKIKazF)=Zw*=iHom2e9`%8@9v{~XuG5&4Fr*Zmaj31Cr z-06AcCc&2gPwgze)-5tAhv2V_uU;qd;Hh*3BSM9mFzr@epB=K00i|db! zf8l!)Ka252;6K%?=A#Q3fA#=bPStfXz0#KrjK4Y{@rwUHVth99;dCzNHO8M)E7Pm7 z_?+?ijK7uBp9Y1Zel@R_=~eqTFn$B`f4(Z8@tS^0+Adx(sqM!p5)oKBspu-&B9k!vTq1JEsFr^&VU! z>z&E%X-ks+Jx)J!g-lQH;L$Yz4W@D~t#^y~Wbqg8Nicrd28nkue%dhm_ACXS%6ZG8 zcRyqNhZgyKp7D?JxJ=;kbF*YQgIFKWVf^)sf3Hrq=L*JO3dT~sSu9uijK7!hI zHxqcO_lYLA$UjRKqx~oFs?CfNAnPQVzGAam#Faii2K;dRQ+{;@XW9$AT{&-ZIaYgk zF*XkE_^^}lgEz=}$v>m(L&l%Ud{*Olu9yd98uQ;vfX}j-A7=wk^}fLUQg+}$;O*M~ zBBy_1rCSt_o{Q@c<9Bd5N)97vFqQw?HExm4DgHuF8K1*;Qpx8PxS{lKRm=1We<$Ou zeE19F7qNcb%;ld9hn33N%=}UPnh3mIzh-iJt35x!curWqgeJ zgH1Yd{gUzY>0P?Ui@$hApYiXoJkMhMv@>M+4{vY_Od;d{$@q6IdVR&2_WfD|{BYBH zCyU$jec-A6)jWTp8RF_-eE(Xvh@<=BI?Q;h9UgI(EaxTeuS#FQ_%`Ohl84I}|1#^l z8kb)){&ODpRa|ejllp~q0nbxG#!myD+HbY1Ux@Ua$OKBymg(29d@8$f596(NX*3Kt zmA{zBMakiljNixnxryueAI5*M)-5nT#!os|mNVSa{@*aZf%}`y>HiG8oqYb4(_8(? zTgS-qr#8!a#k-~`>pjMI)Jwe5i_tJ#)UPJSU%~axV7%2&zMSz^IXr0`*^|?I`2Xhs zPvu+vrJETaW`54#djAT%U4LhtC(CKRMz;S>PQQ=wKW4s7WBezKf0_BO#_J9!nqB$3 zfv0xr4YGWCmzu6O82|KjZV^ZK#dYraGX2$T$MP6|72|Jb|3UdbLoSf%t@BhR@J`Na z2CF#zxD9TBR{H)L;~%(Q;#I$jFO=nf%KE!N%a zyYV^;cq-?Sjc$=}9_PA)@%h}}#f<-i@ds;UdS#bJPP8w7ViNp9;Hln&8)P{za`_K2 z-s*o|l_SfipYbza4>R73qx+5|_!C|B?c4@DwP%_|4qr`@eqgRl|250=P;Nj2@Z=9z z*YB%1{e4_M)KgsTz>na2Gx#6Imo0Uh(OMc5gxYD9ht-U?+Pym%Z?*qJ^6l$&Cc#$& zPxUSh$o7olR&7X<{w_{$^|$vYNq>;jTi5TGOp^7!%K9>s>)pxtPM+^zF2wZ_<7czo zn8Wy%0$GlAJ@OjxcKrW}(=T8C*wb2zh$mOYkyCJMcE!ReGX==5hKxUb^TT40IJb3Ey)+{6}1lb-wbMW9|^ zQBz;(tI-3|`fx<|H8yMh`r3w?U^E!WF38K9Y@bM{Q#wA^3)V%$tF@{yz8t3q8f$A; zBa11Pc+^KpgUvpwA=}^3py^kXM@wB(8bU#Ta8)Q0)Wun5xY3UzSRxhTsIZ<63pr&T z&O7VoXTjpX<>*^NKMra+K6P9%<4&UQXaG zUSL!{f9A|`PlaAFeMYH=GF8kiG71Pp>h)@Lu*UXa)pXJ-o@CJU;>+hvpIcHy{hw1F zEszJyg@MMfU{y&?d32JvEh(*Wc@T2cvk&<#>UpnELVu*D~dA>e-!5sVrH;XI$HLhH}JTCQB4SH2Wjvi_7 z*G#Ue53llt10@wXx~HnaMHv>9PtwaO^x}s3<7r`7C$)~TiveC6oP^doGDiW1P4U1p}x*ve8$@;ARiUqIX|2nU6P zg@aY_D(d{flJdzqD`tvqQZdgdsZj9ZTs4Ipzp-#lvH9q1iDB29V4&r|8C-@{gmuf) z7v;Kg;fDDFtH}%VuY{(VY~D8*jsjs1Emm_j%p~k%+%%#@R}M|daAENL7OVv{Noqoo zUQ%AHLvcOuYiRah)pF$}n6yM|W+xUT4>cweMR622C0Iv~r|F@plEslQH6#xXT*NS# zXbXIZCSOfs&_j(<)?8XO?gz~83YrsyyFxC~>?VByd#^<`zDR^zS26XaY<`}sY?YV^ zn|PWd!SJX0~DVYlZ*yzU&v%Ipr+;~_F)d78OalG z&m?Jw42g+1sE5o#6wGd-k>!|$EN*)O<#`aCV-+yNDyeBRE@1vTNVyBNY{*Pj1HBCT;)Tb;m+cd-O&(qy`;QAN23!x zx!G{qfLAt2sXXhuuvTblf$(SGzEf%}h{z_vafd_GSVzkiv3?5%P+J2$ixO0l0@nt$ zqB@W05DX#tk??@2Ny_b$c8At^%y%8HnBe6eX)ur5(JYelP=cEo4RlShw$BT#*$F&9>V+0WcEJMD!o&r)oT>WMCzDTh zOxqCT>6>S}2-`lZp%jgk_CvU=KCw9I!)z!_1{R??I%Z~=5ZDTuLVVF+b0}&x6LqU< z%ft?ed8m3!lhOc-34%N+F})>P(L^`Q7^ZyN_3&g`52t)nAg&Cw!if)8;v7Y8x|Q{& zy|ol<1I24Te&<~-DoQFyIn}Nx_S0xjw1T`>HuO>;kH!h;&A=si1BNS*u+S&h#?BQe z$J+#C^On?7+JOG40(QPcWAx`1WG22YgW9>Qmy zx#2AREB0+@U;lXbZQ^QgHxLksqTHj~5jrcXV`SSrq(OH3#+jofywHdx4!eR+?3n7wzCA*N@yi-h1g5sWd4CM7DL~>q$`^g zdL6WFydoIYD;nabqOu!igJY-5ialiGZI%j2R&?L6REQsQ@ew=ZQY|k3H`eW9BPjW{ z(KoEy3#CsCml!WQE5{fLZSkLl@Zzi}esrvMU#t?HTKi+Se(y1jfAgZSZzG-B?Q)k3 zmQ8K=l#qR}-JF#x0i>hz`?x_fELp9w65w-OTV0-F{Ia0Z6qhSnjZH}T?8Ox0Hv?@q znqW6QP4>rK@l#YX`*{3A>@(6nf!HM=Clb4n1z|i|QD2KkJp92(#5h*fqrMd$VX(0` zS&NmrQkz&;xJ|BO+8%$gWbpC z8k8KRYoJ#`a{1{Waq4&6o7naTphhR+2cRi+;R=nt)Y`H+w8JL0Nc_ocyv;_5)OqaT zLAoo+C4=oEC%lL0hdv)Sldw9StLFKjcm$0sjNL*M4S}Y?3e@7TRvJS?3e6|}dA>kE zY+dK*QaF#{7`L~%=3l;c6qBy)rjBhtU3v6#uz03LPUfCUG1i58aS(_;K2kzYl+ea4 zgoeDwzL@LloH>lOb+NAeJKKr0&r9}{YMiNr<{U%u%X^xt$Zl1?_a*XT_aBJVc*ml zynL!Ju74Hwr>YX#s({Q43Fg5X`WKElrE#hlzM!}1!NJx0D3$UO-%Ms zp!JCxJGRQlS<7ZFD6cQ?Ij_O?<*RK`b-XT4#zIwkU45|GA8f$)n)>p{9wt&4c`=d6 z?NK@w9_{ zPPmdDBxv{ zJAQ{b-}InAZ82h##;S|@isoh=CyJ5!I=tNxU9C6eSX4SboPC=E`g-baYlKYa)^dVK z@^t(+%u9W26COmcXqyy^V^XL*xZFL40Zz294g%KmQILh=Glh6yGtyW~`xQk+c+Vn7 z7`i^q>?8wP{B zq*FTbY2xQYc&b0qum2<#n@0$#i(J_=M5 zJqpLPlDuWpV{yDy$q%i;rxI_&$k9)kkyo}VrHe-MrXpVCfiGcu>ZX=I$NC(4Tn6Pr z15#&w9;MgCbMi-oP4Bna?!YLmJ6dxZ8GIJVTAI)2`qc-mAg`r3`%6P!rX%#Yt5*ZnIa^BZ>#`@K<~^HalD|ypil+HNSvb9Y7%} zhSL3hotHS`E4s2U`5LP%L)~BHM@Cfas}1>)uRcl@0z=MDHa1hWY(UsIIDB-TNvl!@(vMqc8iadvHh{gX{ z^Ls(+hoI`;k>*BB!*@>w&`u{JuKp4jzn-AVDF*&?{HBJ=ufB`AhdPC8RFeFXil0rz z%?Su9zxsY^$6%QvhhIjg;)r*d5^a2kb`j#~2r9qvomG72g)a3wB{C+doI zM*G$GSPw6hsW$O|ss5{a6kLg9^qV6puD;va{9T!Uw*+F0h-a$(jMmoRhVUxC`o8P< z#WDpx+aNAgzsf(F@tXiC{|t_+-%VM}`PJ{NP=5NRhc*9Az)=@f`RaSI4Oaq*OMM4K z#$;XUo?r`tIlua@>~_xYR2}0?Sr&MkCI4>zzQgXNGNEcOXEWNb;yW$*)pr5gIDam_ zYhu1s{R($4Zp`gh-?RPn2AN`@O309SZN7en2<11rxy3EXTO>Flf$`VcpNBv(=U3my zJ;M19kZ_wWYyHoFX3np^quaSvW?EwDu(kddE&0{=b}uHz;_4KCq9kD z{VvhI2W1{h^*zS#FGx1blwbXB(SFWvZLc-{3G!0^RsHHaznw>!pT>t|U*%Wd1CFWo z^T3k*p-a`T`n3Z%bN%YO!AE{6k{jt|ES_JLL%)k)&aZwaN!u-RIEiSwR6Yd5 zFPLK@TKglvhigi}w`C8@o6}Us7_aJ8Ii@73f8}E`1^uFl`LaZ{SxNH$^;a_2`<4ur zs8-JTt^9j+kIdg{7#0~<`4wEo`OR&ld;Lj?IZY+x1eT~4N|Jy5Z)AoJ&ZmHt|HDDB zuvjELoj(A#%2J^vDu&n6MUlVkDOqFlXbH~)C`M3n&uH=cUv~BHdrju6m@1+AYpwtP E12BGJfdBvi literal 0 HcmV?d00001 diff --git a/thirdparty/bmt/build/src/CMakeFiles/CMakeDirectoryInformation.cmake b/thirdparty/bmt/build/src/CMakeFiles/CMakeDirectoryInformation.cmake new file mode 100644 index 0000000..c883c69 --- /dev/null +++ b/thirdparty/bmt/build/src/CMakeFiles/CMakeDirectoryInformation.cmake @@ -0,0 +1,16 @@ +# CMAKE generated file: DO NOT EDIT! +# Generated by "Unix Makefiles" Generator, CMake + +# Relative path conversion top directories. +set(CMAKE_RELATIVE_PATH_TOP_SOURCE "/home/bemdeppi/ham/thirdparty/bmt") +set(CMAKE_RELATIVE_PATH_TOP_BINARY "/home/bemdeppi/ham/thirdparty/bmt/build") + +# Force unix paths in dependencies. +set(CMAKE_FORCE_UNIX_PATHS 1) + + +# The C and CXX include file regular expressions for this directory. +set(CMAKE_C_INCLUDE_REGEX_SCAN "^.*$") +set(CMAKE_C_INCLUDE_REGEX_COMPLAIN "^$") +set(CMAKE_CXX_INCLUDE_REGEX_SCAN ${CMAKE_C_INCLUDE_REGEX_SCAN}) +set(CMAKE_CXX_INCLUDE_REGEX_COMPLAIN ${CMAKE_C_INCLUDE_REGEX_COMPLAIN}) diff --git a/thirdparty/bmt/build/src/CMakeFiles/example.dir/CXX.includecache b/thirdparty/bmt/build/src/CMakeFiles/example.dir/CXX.includecache new file mode 100644 index 0000000..065d1e7 --- /dev/null +++ b/thirdparty/bmt/build/src/CMakeFiles/example.dir/CXX.includecache @@ -0,0 +1,36 @@ +#IncludeRegexLine: ^[ ]*#[ ]*(include|import)[ ]*[<"]([^">]+)([">]) + +#IncludeRegexScan: ^.*$ + +#IncludeRegexComplain: ^$ + +#IncludeRegexTransform: + +../include/noma/bmt/bmt.hpp +chrono +- +cmath +- +ratio +- +string +- +sstream +- +iomanip +- +fstream +- +type_traits +- +vector +- + +/home/bemdeppi/ham/thirdparty/bmt/src/example.cpp +noma/bmt/bmt.hpp +- +iostream +- +thread +- + diff --git a/thirdparty/bmt/build/src/CMakeFiles/example.dir/DependInfo.cmake b/thirdparty/bmt/build/src/CMakeFiles/example.dir/DependInfo.cmake new file mode 100644 index 0000000..2278187 --- /dev/null +++ b/thirdparty/bmt/build/src/CMakeFiles/example.dir/DependInfo.cmake @@ -0,0 +1,21 @@ +# The set of languages for which implicit dependencies are needed: +set(CMAKE_DEPENDS_LANGUAGES + "CXX" + ) +# The set of files for implicit dependencies of each language: +set(CMAKE_DEPENDS_CHECK_CXX + "/home/bemdeppi/ham/thirdparty/bmt/src/example.cpp" "/home/bemdeppi/ham/thirdparty/bmt/build/src/CMakeFiles/example.dir/example.cpp.o" + ) +set(CMAKE_CXX_COMPILER_ID "GNU") + +# The include file search paths: +set(CMAKE_CXX_TARGET_INCLUDE_PATH + "../include" + ) + +# Targets to which this target links. +set(CMAKE_TARGET_LINKED_INFO_FILES + ) + +# Fortran module output directory. +set(CMAKE_Fortran_TARGET_MODULE_DIR "") diff --git a/thirdparty/bmt/build/src/CMakeFiles/example.dir/build.make b/thirdparty/bmt/build/src/CMakeFiles/example.dir/build.make new file mode 100644 index 0000000..d8157ce --- /dev/null +++ b/thirdparty/bmt/build/src/CMakeFiles/example.dir/build.make @@ -0,0 +1,113 @@ +# CMAKE generated file: DO NOT EDIT! +# Generated by "Unix Makefiles" Generator, CMake + +# Delete rule output on recipe failure. +.DELETE_ON_ERROR: + + +#============================================================================= +# Special targets provided by cmake. + +# Disable implicit rules so canonical targets will work. +.SUFFIXES: + + +# Remove some rules from gmake that .SUFFIXES does not remove. +SUFFIXES = + +.SUFFIXES: .hpux_make_needs_suffix_list + + +# Suppress display of executed commands. +$(VERBOSE).SILENT: + + +# A target that is always out of date. +cmake_force: + +.PHONY : cmake_force + +#============================================================================= +# Set environment variables for the build. + +# The shell in which to execute make rules. +SHELL = /bin/sh + +# The CMake executable. +CMAKE_COMMAND = /usr/bin/cmake + +# The command to remove a file. +RM = /usr/bin/cmake -E remove -f + +# Escaping for special characters. +EQUALS = = + +# The top-level source directory on which CMake was run. +CMAKE_SOURCE_DIR = /home/bemdeppi/ham/thirdparty/bmt + +# The top-level build directory on which CMake was run. +CMAKE_BINARY_DIR = /home/bemdeppi/ham/thirdparty/bmt/build + +# Include any dependencies generated for this target. +include src/CMakeFiles/example.dir/depend.make + +# Include the progress variables for this target. +include src/CMakeFiles/example.dir/progress.make + +# Include the compile flags for this target's objects. +include src/CMakeFiles/example.dir/flags.make + +src/CMakeFiles/example.dir/example.cpp.o: src/CMakeFiles/example.dir/flags.make +src/CMakeFiles/example.dir/example.cpp.o: ../src/example.cpp + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green --progress-dir=/home/bemdeppi/ham/thirdparty/bmt/build/CMakeFiles --progress-num=$(CMAKE_PROGRESS_1) "Building CXX object src/CMakeFiles/example.dir/example.cpp.o" + cd /home/bemdeppi/ham/thirdparty/bmt/build/src && /usr/bin/c++ $(CXX_DEFINES) $(CXX_INCLUDES) $(CXX_FLAGS) -o CMakeFiles/example.dir/example.cpp.o -c /home/bemdeppi/ham/thirdparty/bmt/src/example.cpp + +src/CMakeFiles/example.dir/example.cpp.i: cmake_force + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green "Preprocessing CXX source to CMakeFiles/example.dir/example.cpp.i" + cd /home/bemdeppi/ham/thirdparty/bmt/build/src && /usr/bin/c++ $(CXX_DEFINES) $(CXX_INCLUDES) $(CXX_FLAGS) -E /home/bemdeppi/ham/thirdparty/bmt/src/example.cpp > CMakeFiles/example.dir/example.cpp.i + +src/CMakeFiles/example.dir/example.cpp.s: cmake_force + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green "Compiling CXX source to assembly CMakeFiles/example.dir/example.cpp.s" + cd /home/bemdeppi/ham/thirdparty/bmt/build/src && /usr/bin/c++ $(CXX_DEFINES) $(CXX_INCLUDES) $(CXX_FLAGS) -S /home/bemdeppi/ham/thirdparty/bmt/src/example.cpp -o CMakeFiles/example.dir/example.cpp.s + +src/CMakeFiles/example.dir/example.cpp.o.requires: + +.PHONY : src/CMakeFiles/example.dir/example.cpp.o.requires + +src/CMakeFiles/example.dir/example.cpp.o.provides: src/CMakeFiles/example.dir/example.cpp.o.requires + $(MAKE) -f src/CMakeFiles/example.dir/build.make src/CMakeFiles/example.dir/example.cpp.o.provides.build +.PHONY : src/CMakeFiles/example.dir/example.cpp.o.provides + +src/CMakeFiles/example.dir/example.cpp.o.provides.build: src/CMakeFiles/example.dir/example.cpp.o + + +# Object files for target example +example_OBJECTS = \ +"CMakeFiles/example.dir/example.cpp.o" + +# External object files for target example +example_EXTERNAL_OBJECTS = + +example: src/CMakeFiles/example.dir/example.cpp.o +example: src/CMakeFiles/example.dir/build.make +example: src/CMakeFiles/example.dir/link.txt + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --green --bold --progress-dir=/home/bemdeppi/ham/thirdparty/bmt/build/CMakeFiles --progress-num=$(CMAKE_PROGRESS_2) "Linking CXX executable ../example" + cd /home/bemdeppi/ham/thirdparty/bmt/build/src && $(CMAKE_COMMAND) -E cmake_link_script CMakeFiles/example.dir/link.txt --verbose=$(VERBOSE) + +# Rule to build all files generated by this target. +src/CMakeFiles/example.dir/build: example + +.PHONY : src/CMakeFiles/example.dir/build + +src/CMakeFiles/example.dir/requires: src/CMakeFiles/example.dir/example.cpp.o.requires + +.PHONY : src/CMakeFiles/example.dir/requires + +src/CMakeFiles/example.dir/clean: + cd /home/bemdeppi/ham/thirdparty/bmt/build/src && $(CMAKE_COMMAND) -P CMakeFiles/example.dir/cmake_clean.cmake +.PHONY : src/CMakeFiles/example.dir/clean + +src/CMakeFiles/example.dir/depend: + cd /home/bemdeppi/ham/thirdparty/bmt/build && $(CMAKE_COMMAND) -E cmake_depends "Unix Makefiles" /home/bemdeppi/ham/thirdparty/bmt /home/bemdeppi/ham/thirdparty/bmt/src /home/bemdeppi/ham/thirdparty/bmt/build /home/bemdeppi/ham/thirdparty/bmt/build/src /home/bemdeppi/ham/thirdparty/bmt/build/src/CMakeFiles/example.dir/DependInfo.cmake --color=$(COLOR) +.PHONY : src/CMakeFiles/example.dir/depend + diff --git a/thirdparty/bmt/build/src/CMakeFiles/example.dir/cmake_clean.cmake b/thirdparty/bmt/build/src/CMakeFiles/example.dir/cmake_clean.cmake new file mode 100644 index 0000000..953ec20 --- /dev/null +++ b/thirdparty/bmt/build/src/CMakeFiles/example.dir/cmake_clean.cmake @@ -0,0 +1,10 @@ +file(REMOVE_RECURSE + "CMakeFiles/example.dir/example.cpp.o" + "../example.pdb" + "../example" +) + +# Per-language clean rules from dependency scanning. +foreach(lang CXX) + include(CMakeFiles/example.dir/cmake_clean_${lang}.cmake OPTIONAL) +endforeach() diff --git a/thirdparty/bmt/build/src/CMakeFiles/example.dir/depend.internal b/thirdparty/bmt/build/src/CMakeFiles/example.dir/depend.internal new file mode 100644 index 0000000..de03e59 --- /dev/null +++ b/thirdparty/bmt/build/src/CMakeFiles/example.dir/depend.internal @@ -0,0 +1,6 @@ +# CMAKE generated file: DO NOT EDIT! +# Generated by "Unix Makefiles" Generator, CMake + +src/CMakeFiles/example.dir/example.cpp.o + ../include/noma/bmt/bmt.hpp + /home/bemdeppi/ham/thirdparty/bmt/src/example.cpp diff --git a/thirdparty/bmt/build/src/CMakeFiles/example.dir/depend.make b/thirdparty/bmt/build/src/CMakeFiles/example.dir/depend.make new file mode 100644 index 0000000..ed8b29c --- /dev/null +++ b/thirdparty/bmt/build/src/CMakeFiles/example.dir/depend.make @@ -0,0 +1,6 @@ +# CMAKE generated file: DO NOT EDIT! +# Generated by "Unix Makefiles" Generator, CMake + +src/CMakeFiles/example.dir/example.cpp.o: ../include/noma/bmt/bmt.hpp +src/CMakeFiles/example.dir/example.cpp.o: ../src/example.cpp + diff --git a/thirdparty/bmt/build/src/CMakeFiles/example.dir/example.cpp.o b/thirdparty/bmt/build/src/CMakeFiles/example.dir/example.cpp.o new file mode 100644 index 0000000000000000000000000000000000000000..9c6fc5e5d73fd03928702fcf95b27daadfe3d71e GIT binary patch literal 87416 zcmdsg4R}?>wf0U9X!N2)MMaAmBPv*=35gge)$nulL;|J;6cs&$TWe;`?6qglB zze4c}if^HK6~(twd>h59DPBYI*C}30@f{Seqj){VcT#*8#rIHrFU9v$ypiGuAl^jR z4^sSXioZkgcPZXN@k10pO!4q3N%7MZKSS}4DSnpX=OBK8 zuDc+9k*;5&cqhd#Q@o4fS1JBE#jjEPI>m2L{3gY}rua7$cT@Z}#d|1zhvMH;+(Ypn zD1MjX_bBeA_Kc*Nf9M(CkmKYDA_$Z2V zC_a|rVH6)vaW2ItP<$fABPc$F;*k`ePVp#;&!jky;(UtFp?EaK=TSU{;tMDqNAZOe ze~#h;iVG<&qPUpii4>PmJelGt6kkGdDaBJMo(A#USo@zUW4G>m7_Qr&tNZxy;aL0A z3ov>m+Fb)qgn0K*j62s996o&bd7$%R?XO^&DlFFiGFDGdV9$#LlR(p72{L^4`^0VM zno^V;VC?=`W$=(ZrEib4~+$v$e(T1#!_GZO8 zqEcnn?%3y}y*IxI-0>i{xb0k10jSGbOasQQptS<^trc?A?JLn9%=z9pu;n5*CA_kx z4(?8aq*&*DcnRqXM7nRcLX=zX69SZapKf)u$H-sL%^N^olMs8K^sP@$34Il-;8a=Y zjIIJ3WuBQ7K>HAZsmks^>3%T$gf;Wfe;qPvk8X~2!hfO;Cqqu68^XyC3L&bnNtN56 zc0vEa@LbX=83N@TxG_pOY*rVT)fF~NWojEFZwn_Ys{=JCb5`S+0u{=vpuBapcmMep zNYwrk&=rUpr<~WV?3Ib7EOBo{Vu6;}ZZK3>7~&k8B8BkUwReQT{OY1KaSths=Of0n z-?|e!xa)6t3H!U)>V3KGD-X4|9$dZhP*&^tx2`;R_^{`-p6T~uKS4K%;`Xsu1@rSr z`U3WXz1zTYlGwiTV644$f2<=4|8pUN+gz50K!7xR@H*xmX#b7scsk}DY|lnqmTg+Z+#aPH z1muCX-uBja+voOx;NFhXk?m11xyM)88(aP2$o7|G?Y|YnRL9(&j_AAGCX*m~C}Pw?|^DcaBu`AAJkdqDIOI!0LT5VAu|IcpIc_ zhm=@542PZYUl|VFK?ihLBBcjXdUVRzmqb&G?uvEJh5y*;`J+Gw+^*gkQ>s_*D*#2^ zplCOUW70uhM-*_?)j&h)1Q&Kf>`Nm*u)2ZvKq(uyofWyD7<{Vh zmP+pyOq~@YAsu`iN=M%#f1WfvlnymXq>q7gaH~!i@anDtol!{^0~|~bl66)T01Ua& z@ZKWq*^y;+7vxx$yzI#(=*wkYxvWfw4`-e7Y!*yRf0Z?KO}6*vtfBj}yl-U#pn7<4 zp{(UhI4C;^%CuZv-he=JX>UbG^e9uXn9y_JF;N?_{DDZpS^_0|MeVqr{gpyUAM`=!MO;_?XunqDCH=PwQnZu&5$ocbT?6UAi)`xAZK1g~G zQgE^g z=0${F{9yPzXm>aE=UFrobl(7Rpjk2f)3M2E{W2i{TpN8D(Oz&S{Kv|rvq8BVNg5zY zHw~@<(q)cTAIxgagWe9ZM;-Ar<95v686*@E&}CYnWb-I_!C>!&a>* zIC~NGy)>6qc-8JoMU4-&Uu<_P4|=WV`dyN}cK!JW*rkDdEUq<#dps6^OT z6mTEv#v;2^K?=pK4e-6KDd&k!_c-i3KNxPJv9bF3MP%mGhYnv`ze`uv)SoNkdVV80p z+Kep;`k8RQn@aDn8yEvczj|LOnA1&jn{G>&4(fJ;dva8@bMy*5AbA(MahLAV(d`F8 zGK@hmg%NWWU9&MKvYNC<(Yey$BR4%+vxU6X|P`5b8BFXX}>Q5&IG=v=c3({=t4@5WEEwD@(ne^y=TaUyd;xVkFUo`&aFGK*kGuE z>D=%Vn7m#4X!gd4w=F{RxT)7ny?W~0sdKP^W3Y6LwLiD(BUJo!7eu!5-3_>3zF;pB z^7-ZJ{aLqnCC96j&WaeAj$BywEJuN&Q`D5ZSo@mo$U)Nmu4m0Q35%#*Y-@sazD)vc zm53fBT4A75gxE2WTaK*b5+cewzL$~+v1u6GDO6UJcRZSs2(cfdb_I1uLRLJTiU`=g z!CgWlpXD9TrNj)?E4ahw;CfiPUqqH|LrOM1zFBgKs2yfa#=aaSn_L?umxzpMD7shR ze$5J0C+MSPI=Lg1C{2-$4kgOmITD3qY+}pXg{x6@A2gyY`=Jq4rXLznGy9~8wTlMO zH%-UWqMcEGpuO!vr+41B8zaA&WA7s&=@r%ca4gvS2syXz#6IQvsq?4aF!jc%aoG!9 z3SH0gCystD$NP1TT21PqH@d&W`v5i*VlXL$LWT`Jnup-<>>OuPpUyxBM2l9xp}^!s zZc(Uh2RRdCHsM60-#w7wxisOuQA~J`m`ylc7PDft681dk+{>|ZFF)~t9PjR|p+C>@ zwq*@{CdXTqJ@nOMysqq_hjP4kvWNEOc&|ieJ$;P#Z$nPpaE$lsAw%K%y&)%VImY{N z=s&XH=7%{?!FU1k{Qr^DvQ9W3gwGy&ZC1;%E4>cf6aXR!ha=}^ot{6+^KQ$!__x_v z?_o+uB)U7|y@LacBm%n7jsZDe&vin@f(uHK(O8a*VPyKH>f8jPMF{D*MusUrn;TL( zJZaiJ{O&Fok&g8@nn3_eB4t(91jBE~$ESfLnC*#sTw*J7v7sWTo+O|hOck4LAoF)V zotUrtYLE&yJ4A*gKhF>#xY{EkFE;kYSo`kSPY=STKVj$y5N9yK4xU)W!$fz)&#+8@ zswb8ud0;z0D#hvx(*6+fQ*vPqI>uOo>eeR9L+-a9jCBYx5QBx2UQ8B(Ot?`c19RGd zQAn2qi%D5Znt~^5rhAQX<#o8SbX2r5A|ay6@dSz7MCi0Xtv|r#)L|p#sFf zauRlF``p}{i(6Cy4`pVPFYH0x$Aj|*UqYoKdziI$)s;Zo#Z7sE=>ms=5O%=iZuWke zRZS1g0$R4%Q`PjZ(o-;cG6Sj5@{HP%3%a3Z_L-u4q+ku#eKWOi-Xk$Z{wNAsbkBl5 z1)^3cE+}hTzu+7#j65j&9aaW3q9*7egOx-yBRzV=z_4zyv(OIpNj?$i7S3fYPYnpV zynZSKcZhvi;G$xI4W+#ovpXrLV-{3y?JRnf@e;+LnwaJGyYfQNU#nN{_mZcIV#5q| z%mO!RiMrVYdDwFrQF6c|irwJzpR-VVlTT=I4A-W?-V^Zdf;{)|{-_!)~`6T%k2rY za*)*8BYL0qS0>57Yp^R`pqRQ{B_ zD#ws5QPXj#fErIO7L@~%R87V{wvUQ|jR~ya{>Qyb^-6-Og3PC7?K}IZjpgls2qE3a*6Z5x|cJicW7 zMT%$yU1Tl4%FAlY&HBu+qjNUE3;}ik#D7f2Yj5iDxnIkke%w*l00CiB5QlIA{!!`W ze)@dGA-MQg$Ydw~CQ3gMVQTiC3UhV7OLFsusJ-Rw_HIN|x9!j^B z5AqzM^e-9w*$aFfdGzvGl+$FFY8y)_9rfuvusX7U(r57WTYUMeD197HUo6w}0NYII zSMhYnV|L7!fk|1*E6PO;K01Q85mk>z!wvq&fbWbC!3*VUkQ2$v(s>ysSUqn%r>XVz zjHC2Rc=|Fwy`0id=jn6(^g2qvnWtwrYyImey)lsfC6S&7*j7q^2g{A;;VnN8uu&(1 zt<15E_EY+2kV?tG3In&8o+}(Ae-EJ3vp>={7Erp2zoMV?CQ5g;=?#>Qzd`Z!uh;sw zQTqBodb>{FP3dk$>3a#BeeGQT~xdB>YoA=`Q{XO1J4B4I;Nu zeSX=ji!rE+KbGZ)ZK0d!TEJdmva+S(&dB;9okQ0hwfgADjYD+4c9GsZ;MI0tFOr4b zy_9}APtTsF`E!oK1kbazL-flSO2=z0pSANCrO)K)*_UbgOZt(&meOr)2mRJd^%l!7 z;1BfMAALXU?n6!<$SDxao;Mw?19HA9eT6h5j)p%F@HZejuh>@<`|YfRXk25*JW9U_ z1*x*jUZnleM(MKx>9SKpA8(}eBA(tLZN&8L{iJtO`gxqc%I7~w>3KYz)t8ajAwakc z|7iQCYWqtl-Bw;`-#kjM2=LF;{B4x(s&5-9{qg|6UthOV`YnO<rMv7uNa-&7 zM;?QMJ+BJ?=={&u@=GY)#Xql)^gIx`o$Be8a2=?J*;i;Cn@EPMj_siIu^>a+SR>fI zJb*?~8Rx@wK*tS|MzrN)ArXSBp5#&bxk#k^K=Xsq0Ieb&-+=3Y4jiL9eLYJ^PBE9$ z=%=rx^b(%lBGXY%FX?&T>^HGp_W12`H_7<|m(%9!*-z=Na?AY}Wb`~+e$ihAl3r}Le5|3^Y5l~EORZNmG}NW=>I7Fvs}LH{>6CQM(Hm9Y@~E&{wdwHw&%V+mWc842u|h;2t>{Arrf=#eeFvqx+QJ@6 zcg=+lQM!m!9pmf2;b^?)CGd|*r^ON4H-*w&ea(DIKbiAa$U26QE#Q=6i1**{d#P}U zc&%X&kccN%6rr?;*A~JMh;5#Jg8r2I7c^zvK^xh_^8e0uu2aaNwI9_=67o z+YbCY4*a_ge2W8r$bmoXz`yUnA93JY9r&XT{4odqxC4K}fj{ZMpLXESIPf1k@Mj(P za}N9k2j1ntUku@60LfzglIjO=y^Z}t192$4obW~i$J>ba5~0B;;_VCqbQkencHp}l zxR?cn(jwl^Ll^=P?==VhO9%eC1AoJTzv;k#?ZEL=ig1W{-C+=ri1)Sw-{ZjFap1pq z;5`of4-WiY2mYP|?{(nsJMeuD{4Wk1Pu&QIh=-?eghRwT5C#E>cz8-jI7GaIVGxjr z_YVjDPY3>y1OM28`|tH}f{4cst6>QdaY_gS=rZErsUhJI@p2qEo+=U!5f4up35SS> zr;da}#KTia!Xe_}sU+bL@$i(AaEN$#YDqXmJUqoD93mc`Y7!0+4^KG>hln@IfuHHX z^Bg#yk`fLP4^K@Ahlq!#sDwkr!&6nlA>xg3;JAYx4iOJeVF`zbho`cHL&U>VTEZdX z;i)a*5b^L7mvD%9c&bY{L_9p@B^)9ip866F5f4v+35SS>r^19o#GB&4#g1W!A>x%f z=Fy!cpJn>f8kIGh&2eMMZ6gy`8Z;t zoZUo|Lor4X@A446#A6}+Z-Vc5@t6ORE&3D=iRQS`$dc&-B%I}V{V7d2QGGZLTM3?w`M;X#wq`XT}8Zb`@&=xzZjK&8Szf@Z`c)#YwXUyjCkT{pbyX$ zjCUanfrxj813%M&pXI>w9QfG|Jl}zzDc7)pb2 zIfNkq~U4!hwI$frsZ0NEq?L^9PB~ambnLz^``T;dzLZKhHsr zv(0dbc-MtNKqB7t4t%}?zrlgu=)lACB_xb^3mo)S4t${luXf-y4m{z&7di074%{`b zt98(S$${VG!0Q}%y#sG>;EfKv$$@{_fj2ww76+aT;U~b z46IY&U(s?D{YwI0p>ak3p}=p^xSFvY7WgWStC`tpFy+l>&m$CGB=Flby_#9h6!>b5 ztC4t-z}JNETLu1gjVt-z6!=<=tCq7<;CEzZE8jQ`z1|64)-rlwar1!ITV z*;DFFoLO}a0cM{-jf=?ldfh7{ArC}M^{Ik zg!Ezj1%W@K>DA1;4LYuTFN_zSjQEc=y^?>uz@OE)vS+=(pVK(LM~PuO;I@3>lGuiq zIPew+{sX}Cy)K~(|KNL>7=G!X|Gy4=?5TEpW;pQK4tyTq`Rw^up0LLA?h^P;y7TnR z`xIE}=d@{7cEngNC2ao`1ou2rt0j6WMzU zYX9;YAh4B7l>^`8z#nzsKXKr%IqM8ZneV->aUc_6xxf#E@Ye+X zhQ?LDeHu76pFQ7!?oNbT1pW_AKbEfE6!_mYekEOI|F*@*YhoxhX2QQ&{jxGI+kV0^y!uEs0q>Un{`r*Tzp=beM} zy&Auqt{!&a2L=AVroV-*T1KOsF#Z>T@6+^`(bd<$;rZTQG_LF)3k75=$GZT}XU`cG z{RaZ~pQ9|%?$9;h;_(0&{`WzjDa>Dg+qrm;=bgKURr@;N^ajHJVpkn8< z=Xc7^|08h!IURH_B77)t|2dtiKXWfYIsS9HLZW{YaGT%$b|E9qG=YYzeS>usD++G?49xlh% z1)itdo$~W>pF=tRbHigv??Qox>(6F^`_B`PCwi{{<@nDNmHdAbc(@$@D)95PJ<~|e zRZwvG>^UQ3Uxc3mZY#%Kf?mza{c&yb1e6mlmk$N*KbM5!6XCcbr1zgoDnI{F;Qn(- z<)5RWqs#Zg{)`Gd?9T@T?mx#IL3%#~+?KE0i|qI*fakO4p2|Pp5_s4Txf7Aze;%sJ zt3}}9eC-mr|D06yOJgoZIsS7}Rlbi1+<#6wkMeaiRQP=MoK(?&S>XP2Qib;jT&;dQ zPqp7ElTc2nt|upu{PhC&pOdP5{Yl`{G<|~T>n7X%nRMXm5YHxN7X7P({s{;EJm7hZ zUtcELs^CcX#Q5r_CNF+nWwN|*Qd4cBI&pJtOCkMA%ZK3>(lSkg?aUEI*voPe9+!Z@5&4`cfqwR+czkg~YrMLxt$YIT)+82Hwbmu$pyH-@Q)69i^|GiJzr6g)X;aHVa_ho) zS-idq4$O~VcU^fQ*tR&iq#4e(k5^TTbMl*(Mkf@6(PT5~DsE`3uPRztpS%c8#!oa) zzbLvi02dYr(_6s$7D&8mW@Sr#y^~@Fu~#N1%Cqy!X2WImlIF&S#*&)WW|8f(n#$xw z;>(X=b|o7m1H_$POqKUTmfVPzh{IQ^U>m71-rfCaW5tITyvt8e8I* z)h8F#RV{9*#Eeuzjd7(^H-=Iq$5mH_5=dCpY-bYFb7fPap}w&uA()`Hmehh-*aH-| zB$A7wVc`YRROqj(YZDF0+C{b1phvS|y*2y#35KhS;#W^7oY0blv13`h8i$*rhQ^zX z0fEMeLJ9k2Tv&hu2#im)4N0iU@vyKcp#C7xXHAMm3nvP3Ew%9Rmw2M7v3iMNY<(?s z(C%@hu5tqO(bz#3RyWqy!{C}+)|4ni%X}NCC!Y=DP1&5n_#9-3MvIXWN4ksS@Mo%Q z@@ffC=enm*SzYCM6oG7S_QixTtJiOEVU85zJ#-bml-Y z^o&cZ>RJ;~sQbDdNTaBhV7bpUI+Bawi%d-|i5jD)2uvL(=&56Taa&uwDbd{0*Z?!( zJ88~(7x`;TtGTWM1R8>P_f4vL#T1*EkmwH?DRQyFI?(0x z)#z%DT=zc!wS(^dp}!e`+F&m+fWdBHY6p?qLH7WYnp#_yh>x;i`!AMrq9XX@8~7sK z$_f%-?7JBML4&MJ7QqMU1}%nlp1fu9I_`I`oZ@2oJRQxutnG55{Cb{0H5;@)i(6`! zCo(9nwoP`S_B>)i^_4#?fxgm<@neF05tOL!3HC)aNq<-ns9L_*1~XJ2GcsI z+r~5(R;psUC+D(iRHq?bY+vAH0~R|-7!}hypr_$dZjpQ{hF_5(!~5zjQ}j4Ycrxd4 zoM1zTG*Qg)nA<}h|HKXN$K!n&Zg0ee6ZnTW`)X(t3U%uVvsgR0!Xo@=XQCPQQ=8#4 zp3#NUe}yHmGKTF{iNfDRRef2tXcSATn&Zjls@h~rSv77gR^y(5zkF89Y>-o#tZOL? z^F+mJUCM7%qG6^C6}4h!HR^{6O?n0tff&_Gq{pcyaCXm6l=&x@SGHUPA1O^P6SpFB zPLEi!C}#_D3A_+DWFfz}OcV}ptZRu@Tn1cvdja-E{iGUAB&W-%4ZLzUr}k4>g4G<^ zC72Wsy>!NwV0w7zdk!sqn$cw~?vStrt=zefMrm1F;4f=&3a*yDYJVS+GrrPAkxpId z`W$vT)b>w!*_*o4*>tWlSq$5n@<~x~qM@eF)-l;S8ESzvDyt^fSV;{HyE5p>#M2tF z-i6)>Mi_)e^TP|(U0|=4Tmth3`Cw?0{^+rFRe=s&iqnH|yQMXRP>_M8?{vKb>2LcF z_9=%JIrt|~GqkLXnO8Uoz8$?dk&Mr2@;Cmehc1cFNI(zXxNIPgC!mLpE`{X^%rKx1 z;0#X|QpPM-&cNmSpqCXTFp-bzrA1{D_t>CrHr^bsOEkdaOZh2pe|*h=#_Fo3s%qG7 zB4-y(h{HqEn-X#PU}#aip)t``ooIsHy+J%Af`(>1X@X%h8pTaaHMt(N@4#2HRNYC4 zN66KUP0MiRVyLdbIsx77q**#wrBt9?pVi)AaZ%5k+-4-d83shR>c<^((jDDyWZscuiy&(72w59+?H-|9zYbdr9+ zk5e76Te@z>oa#F8SEc{_e-V5uzNsMI(o|iibBT>@Q4>y-p&N%;;hgx4rYrsD27@ke z(Px<7n=jKwE(LA6cFCn%UDDdI>#9*0m@FXg7P6&`vhgTqO1 zDqx>2U_$}7qyB+xf?M?(^8z#_lQRN6LZ;#vTODM+fx^L8A%t(>D)@lFpGgUhfqThi z1U@cPleHt}rT*ut6H|i7ibrldU~qkVRK!n|)w9U5M{@)g@r74#qv~UqaP0 zAIK1STk$k{{50@BXg#j~tcvVBJ|HMumyg^zOUzk{2 z+W;>bB&(Nr@LU0q=7d;k(H(3_s8s&uX$Ia+D=Y}C4lWX_gN)XW;M_bgbq77>X4}L# zz8tG(`tc=+I(Q8M-g+pT(S&n&{qn<7wFiW=d3fmsr}Hz^@+o6gJGkb!B6DkwjI7YX zwX@>YOA_$PF)YcW^>Qr^4?&B~>Ks}?R}E9yX);*D$A zWr67jZggQG>-EzSTCC~P<%_k<^~JjX67S3oYH(Luav6Q0RL>X&Hs9dW0U8P8RjqAh zVvz)fssbIzTfJg@Uept}-0DtSe9GlHe^cpu@c|zeCbJQN8Yo)OsPb z$up3#Z-N^8GFr+ddMgNe3;9ZIVM%!C%iO@nM?L5zYU;#`yzJ)_85qQPFAZxdF$;l_ zVkvYPxbGs~x;?^Un(jQ(8Pl#vbxfo8TI%K7F0k1(A@hx)02+g8q@vfg=LDJpQCXTo za8QBsRNKZEj^#99TAX%LGwOn1tHuU~9kkJ^Gpf;_b)Y8fKG3-tRY6ry zJaDu>5E#H%0qYB+n-z=5cs)KQk-vG$%tA)Ks~?}`%Y~OL3X9a!81WhuKFa{NfkK#t zie;3Td{#{mQ-J{t|5)s}(+wRIyyvH0fGUI^d1W|vqOpJq;W2!>T4n>*hVY9(<;VeK zfV?)_7h?08QU{Hdc`cJp!>tAL@zVfmTQNTUrDxj7Krsx!H{}cAbpzk$dbJPRb=nKU z2Us|;P@mWgzsGE>ho4PWCt6zcbHI4AYH?I79AVjBKcL4;(Aq0z!qOaeeX$1DC*TJu zOB1qp)vwSLmqAsKZ8%=j(imR?*3?;vR)18~zM6-XtIG$8So&Pj}$`rzHK* z7Jhy!0{(P%7W@kLko>yr;TYlg`{@Px*SSDLIMUBy^j9(ZDn@@DqhHAAYZ?AohPN@i zn&BNx{xNXfMECbF`Wi<61jF%cVwJBhhI75IGd#iQdl`h#^=B-j=k=$M(ewIK%5d(7s~OJiS;*w``n;IYb9?F- zJ+~*x=(#6)0-)&kG8}&`ukgPTZuy=y`sB#c=Mo-x_lMmHhU5M$hHE%jmh^_Az?y zH+9rEmLr~Cq3X%-AyScvk6}2kw?z!cQ#O>GD8o7ZwG8L|PYuJloTUusa_(R_m-8)# zbNa^}_)84ue)z2e{{zFhoyQNAe#3e~Bmy{zaP;R-n14nx`ey-G;S7dzIr$9V!RXIt z@}Fb!cauOQeV)<7F;F6)Jtr}Gj^imjsvPqf{RoD`@g^dmoKqM+o#9;0T!x>_=;4?t z5l{}E>Z0_*F?1UL48un={CI|6;=r$G_;HMWB*V{OI2>mr0@}mzOBs%*_9#2mc`r!M z?NR5PAfE4_$J2I{99MqNWqR`%Jsb-o0?NnJd6b-ThI74j4t$*h-|E13FnkQt`zFK3 zGW>mp<0&@E&ch7n?G)=62Fw?p=A-D(VmNPiyd1e4UXEN2r$3j;FA-X$-U$pphv9sj z<@JH%HH@D7KV3QV9dfQe}}&*eYFaGV3HeDQfE%2YTnM?76f;k;b9-ezH?wDVF%zntMbUz-@t>9;VP z)AMx%j{9#B-D`}V+q0kHTt08_T>dIX&*gAGaC$h-O9b?Pl;!s;4CiuqKQ@EWt8=K# z^5y+4pLbous{oaSL2SNSN2bH(5o+9DSFl3uuL(S_TX9%gQ8dR z=R4@tT1?R|k?dZrgZ?&#SHQLE-|lAk7a7jSO^!do==pq!_iw!aDJ8owzr6pcWOyah z%f}J)iL!GQ(X0F_|Ey(lczgKFzp}5@KQ!Uli}PRwXcQAUtsjN zI_NhtoR9C%Fr3SOop3Do(6dD`MR3n`Ng?K`mi9 z#}IDT!xI=im!tNnPz|@|Jck^$Plfbc&LvDfmowcVXC|ZPauzb2%USG@)5z$#oUbvQ z%js~)xrfnnIgc=$%X!=(=NU%N<@}Q2T+W*gIsd`vxttFe&gFdQkaL*Pb2-Nk!P+D1 z&o!(bp3iXJ4`0l1-rvr2;8!!8_rrX?%G*(~G)J^|K2PTK*D^af&gG0^acUV{w0Rz#Mg3~y!lISl9J zeyIaj^GdXTDU;*EdAZ-n=*@cdF6ra;4`=jT&dm-ym*HnK`?20C`MmzP>g_Tnhp&s5 zGo05?o-giCj(?unvx3Rt>r+l&$LL*la(dIBdx)R=^EjrL`|}nD-o|is4mOtiN`}v1 zcCKRh^$fq2;au-(2fl{kyu5flFJtm=XY}0vYZ=b2hRN?>xLWt1AJ#FP z_lLawUdiNiGWr`BzMkPphV%R`WB8qn{#J%>U^rh#^YMcBdv`H(X-0n^lk@Kk=k$EN z%=_C>z>5LxAjcXocFuJ&>eTEOt5=^CR|bdCLF4qaoknXVE47rMr% zi>?tLM%Nhi&^6-6(Y1)sa0rO2Z$sdH9>a$tK%tc3YVR477BKt-MGWyOhM&mr%?v+@ z;av<@WrxW<3_qFC3&Y_a?N?(lCiC~5)E+kCGmsDhMruxr_caVxYbeBTXZYy|P}st7 zHAljvHwnl2!1-h|;=PPs^$CdoH=|c=4e{Iv{*`u~DKEv}0*0%$g9#N3&l8mLPZPt> zX7~n%=QDg8!_Q&(TMQq~@B<7#m*FFZ$Q#Vwc?>URxaxB-c@D$X+!FC5!_^uV@p~D5 z0Rj}BVz_F{nDiFIRo{j90fwu+T*OCEqec5wUxauu!__lE#OE+vjWviT8Ls+d#8(oI z^+b))h;L%_MF>#%9-}W-AjH35^lI*n$@>^SiO~{cx!QA%2U|mooeS!>2O*BSTIFrH!P{6m?Ex^ivo< zo#FEtK7-*a7#?N#CWc?e@Er`loZ)*I9%J|+hO0FlCgb;CF`)fdh%5PL3d7adhbi+J zuEs3HS1^1Q0u(kee6|81-obFScEjX73|Brz{1C&xhyaCAxaj}^?N@6Yyr05w^-Kiu z`3#?f0EHC{S9?&Hw1#k1e-zJVMt`*u0&y3^uVHu(!_}M?lmBeUS7}2~Aq33Vwc<+t z8O`wP7(RpH*E4(;;TU24pHFFZj9y`ww2t975G_Vq8Ga+fpC%k5l;1{aZ!!8fqd&m# z1q>fSlMKvn6~l`ezL4Q_7+%frB*SYMelNol41bE@ix~bE!xuCB0K=Csd<0E~(EeJ6 zmoWTG44=>Nn;5==;dKn(#PE8C?_hWX!}l<}k>Q6J-o)@xG-*Tozs&F{3~y%me1^9$ zd=NVff7qKg4iV*D!e$O)k-XwTFZF6oxNHfWmx+e?@^1uVVNL zhHqy0N``kaT&+DZxrgDmFnW(ByJ+VshUYQ-R)&`{9C_6DSr-_%S{HcZTb9))s}@3> zY?kp7b%m$yj)xzPB)#!9Rmm!E{KA$NPXOcLH)*MyU&D{8;h*5qLhuX%^GjQyQ*gKx zePX5-k%|Oykca=#ARVq=dRY5ag$`&-bto2>I!@ zj|EA^q!7L|WdClELFM>_kgthZqE0x54+#aNKsozAJ#v3jD8K_h@^XD%C_rcZ(vY$a zkD;Nu5%|sw97cB}z8MrWOsgK)cYNU2U*Qw;`ulv4+k!!78GoHAe*zfz$PxZ@7SAK} zkFrhalRmnHw3qrmu}vSR!#|zl$Zaa8jP-qFPvC^Q)W4$Ru;+iy2X%r5`X_tKkFcjVj{=cB`K*x|R#_=U#ti>s^SbfPJA|L|c;=-T_^(F22%Se)dMf+3cV$>`eUxBnEY zm(fd8XXy`Aj&u7|^~BTQY^@}NZFyiHO36TPVl7eaeAUmU=;x?E3Gk~S{|1R)5ULF9 zbnIIt!Mb86_di?W*AaWh4!>vDx>G zzRV%M6oSJ|nx7QuBM*J|_krdKH`dRhd@>*D2X%9N8oDO;-7(48z5XIZ&?Q>N5uaJOJ`UlqXAm=s41Xs=_kh}0 zsrXxeEy9t%f%+Rt(>Mnj259?H(EeG(e2Hi|9ok`H+C_l28HdKI*@N}mu0B7V==_F+ zE{{}aKwy3%FFxSwdQ!4JV%^8`;&|8iJ$oWy*f+Hio zbJ=5RJJP=Ee`N3fY<)-FaV)Q+(Iay#zQc-CDo3ay2d57iJEq=YVtS1%VB06z+5(5y z5B^(FrZbMvv9{JpM89TYnDl(|AAt&3fHQ?bPivrK9N_n$Oj|NnEv=u13aL9nN2%*CBMtKNO{Tqr zFRIkOw+I-L*`stVpRNdM%VcGQl44(&JJMgV z2<8KKq6R$zK?XwKiU_FrR6ZLKuppzohkurH;0qFl!U6TAavrK?iWxl~+A;|}QXf+= zJdweU`$&9OA!uzzeQtabA)pDmkWcRG2m#HR?JI+4soTDgpawkKrv`$CWwcv8B3~U) zdC9B~gU$ZP9rtf~8#h%x<)i(Nj6?k{S|7I`$or8!B;Qa4 zPvR8)fKt%;$sMR~YCb||>+9wLGqp^fu&?DB1(lI=_5;4bs_>~Fxi2lkukgjInwzVZ zdE@bg$wULZ0^QJ?3KvLDOey76ubJO5oV6h*6 zN1UquB^Le@)8JP{%k4iY4Sx081I|Ap4gOUY`%g}Tf31c8lr;D^Solv(gMXuie`Ff` zn=SmOrNRHOg})uJRO`<+3qPKXn=1cy3;&sE@ON4G^U~noW#Py3r&6_lw}l_iw@#J6 z+rmFO4gMYrKc4TLs{UT$r=K&4{__sNQsqBj(ch5<{~-&1VH)-i0a^?>G7|O|rNKYU z!e5*QKlW7^xc-T0@T+rLIRAt+__3~-`ahor|2PZ(lr;F!SEhbEXED|KgSt%q(lq#I zSorZA$yD{1TllA?!9T~skLO;ds(-$PAM=zde~pD7&-F}|zs|ysXTzq-pS19orNO_# z!jE&fRQ2C(;m0yfm46-a^ZsvE8vGkA{8y#HzlHdD`=6Hv|5gkCwQ2A_W#PXr4gMV# zemr9;RsZj_@MD`umH!tO{u|QZf6KyuV;cN>Ed1-BLrc~EcP;##Y4Go}@L!OI{Rb@k zcs^08_8+qFm!!cz1b>VRf!F_~Y48uT@J~vEe}sj9avJ=jEc|#bR;vCRZQ;l7BBaW% z&O_$@!}BIne`6Z_TP^%}o?fc@pR(|OISu|D7XIcm_;*_PaZZ`4{lBpA<9WfU^1o%_ zzdMckx5vVNPa6F1TKMrC*i`M`XW_pi4gLcbemrkBRsDx7{P(7je|3&MAHQ%;o~r&~ zbgmia$MYdm%K5VM~ud(oNPJ_SB!jI?2rm8<_;m0*;s{AV~ z{CEy)s{FTG`0;GyRQcCg`0@PKRQd0<@Z*{~RsKyDemt)=RsJm&emvJTRsO9O{wLDl zf6BuD!!-DJSorbW*i`M`Y2kk=4gOzP`0-rXRQ11Q;r~$@{Ch0?cusAq`roziZ@2L0 zL7M4xaqkV!6;tEcNTNRvVw6eh z2!%5@>D)aH(1odg9N;L^#c$G^_l3ZV*ikg!fJ+gS{!t2`*dX)pOf(z+6=c8pY}w@h zv4dZogV>+`zd88ZQq*4z#-shFT^ophKcw-X%1>dR17=(KZ+=8lsB*^g$8*^fzrv@G ze$)S}=p4cml_HAKRvxZ!@ZU}RBRDZ!oBX(@L(J6wP>TFGC${nbg!t*0Ic){f^6;pG ze;4t?>j{Bi@;?lK&HvjUk!;G(=>OFKnf`y%q5p3w>i?xf|89%^H5UB`9QyO=9LN63 z?=+~mAR<(L6N#Ung^3<9>-h)(Z2s#d`xhV+1oYoph)w^^a_Cp*S&k9h{vWiJhnPcu z><5yHm){)#nfhBD`gc(N^SCm&2J%2?a_C=R(U1GjrvAGf`uC9j{`$Y;q2sod-~S{2 zF9ocXv>d|klG)0yjqHyRF_s_hKbiL9IoLM+*U*C-e%3FFGRqIY`(@MLWzmoOPp1B2 zV7BR3=ZW^$|9{oNzn$!#fkv}n+P@6=;h$H^Xz(&;s{{%NNB`ma+q56Q({8h0ontzV z_*G=;KL+@1`b$VZum8AyGxb02&_9OGA?0TdqYY;Lf5f4`g7nA8aLoTki~eqh{^g|q z0wcqw{x==^H;FR*{S(0;Br)@!LlejT#-AG<{HGH?u5WlS?eBE( zPbB{Ae)7K-_-+1Q^|;j9P0BInt1pZNz_%=9Cwv|HlHq&42T@NqXM@pi52v-RRJN3QauM_oM$>hyJys zKSummeot8R-|5i5h4kOqkN!Ix`gfE5Hm3hci~fIi=r1~1GG1rs$Fkz#XAb=_%q#@% z|EJ;J^#A%}?d9J|`Y#qz{6D7tcHp;_e;esn?F9Y*j79%dFwaDpX8*C%k$sU# z{tcG=<67U$|JNP*&pbuuxxezi&7r@G^z-t6)}sGs4*idk{+A7h8@FDULx0H+rTweW zIS|nQ&p~YF|8JzA-KZe=XS`BYvzuuUhQi>(GDnnUb+T|GnqXzuV%!pIh|bJKXNSV$$E= z{PzvwH|u2u@o$#d(0|caJoGyB?@v+xAA#TI|6WV}UkAlz{k=5Tp8sRdl2%UV#=^Ck z&&j}V(=R$Zpy%cP2Hcza?|0~*P5SYjFCI+&cRBROo|9aB{`@9DrvAS;^smd4{7}6D z!7#^LaDv@`59CRkzC;jIc`8VtDXJCLtpP2r8hxqgQ;cs#9e@y(B8T_b=hX&xc`M-zq&)aVgFq-x6 zBZvMO`BID7f13IaI`ofuLGttR|AR&UcVV4?GEM*8NctQTcO3fPCjI98CtG_$Ui{9Xe>3Ul{ZFq&|0I|v z+wy<*Ia2;DhCb8(#lUaN|8CO1fcUZhdmpaN`uBoE|7OzPU;Y1yLw`Y+RLskNpGE(| zQ|XY z*QWm)9r_oNel!1O{ri$b|Nbti7?MRm{U5-UssBG5`d6f=|2Gc(IX{(px&8wd{gY0! z`~QB@f29z3)6q60Kd)tjTZlXY_b1AhyHb>_#0ZYyfFQ@(V>4E>F*+bY=4+hv;4-6vgdy^ zIj)%aRb=WP4g9wJH@zr1c>BY>WK;h}hyHn_zeFjf7_-5{Jr4bANk4CYLoE7Zuuevq z=KNy=>3`o)Xy$)9@Z0jgo%Ht*Kl=YD_;2RF%b|by7@5m2@XW%sssCAr{*5~&Blkb9 zrA+2D(a-NcXm-{}_pYaIGtxj-_`HuR&dJgjo)kG(9ZVnmGkM_KeA4fA}IY3Bdr&q~H~ ziC;yg{%qj4<$nX|=k@PQi~cr;{>h}jzy7zyq5olv{ydBR9S;3-NIyKD3IwzKPaSRd z-;Km?_Fty|ZgcS0Ir4AvuLORZ|GO;tA8pBh0TeLGG|T^Mq<>l<_u=~gS>U(n@3z#x z^DO!waOmGf`Y$x}oB6-bp+D~x$;j*97>oW9=h^fBchcYA{=;zKx8=Wt^z;4)-zCbS z{3yHDIrM*lHeSdK?FBRck2?655I?vl5KR7u9riD=`0qlC{qxSZ`)@tz@2~wl>EPc; z{IIW3#`P z_%93OUea<1=SepEHi;i?{w9lld{^JpzX&=QfJ6R&i}d$5|EvOjoB!8Z^y9NIQ$P0eHvLD9m;Qst zJAq)fpFC(MHvUtI-)w(K%foFB{&R^x!HMD8y4`%)+Ir6_-<$svu#`YJ3|B7F=ORQg}?~$YTEy;H!$mhYdAMZ`uk-p#( h$<+(}BZhJyJkFy37)Z0}AGt)Zd2ddZvHEN3|NlPE+>HPL literal 0 HcmV?d00001 diff --git a/thirdparty/bmt/build/src/CMakeFiles/example.dir/flags.make b/thirdparty/bmt/build/src/CMakeFiles/example.dir/flags.make new file mode 100644 index 0000000..efb7961 --- /dev/null +++ b/thirdparty/bmt/build/src/CMakeFiles/example.dir/flags.make @@ -0,0 +1,10 @@ +# CMAKE generated file: DO NOT EDIT! +# Generated by "Unix Makefiles" Generator, CMake + +# compile CXX with /usr/bin/c++ +CXX_FLAGS = -std=c++11 + +CXX_DEFINES = + +CXX_INCLUDES = -I/home/bemdeppi/ham/thirdparty/bmt/include + diff --git a/thirdparty/bmt/build/src/CMakeFiles/example.dir/link.txt b/thirdparty/bmt/build/src/CMakeFiles/example.dir/link.txt new file mode 100644 index 0000000..868b0e9 --- /dev/null +++ b/thirdparty/bmt/build/src/CMakeFiles/example.dir/link.txt @@ -0,0 +1 @@ +/usr/bin/c++ CMakeFiles/example.dir/example.cpp.o -o ../example -rdynamic diff --git a/thirdparty/bmt/build/src/CMakeFiles/example.dir/progress.make b/thirdparty/bmt/build/src/CMakeFiles/example.dir/progress.make new file mode 100644 index 0000000..abadeb0 --- /dev/null +++ b/thirdparty/bmt/build/src/CMakeFiles/example.dir/progress.make @@ -0,0 +1,3 @@ +CMAKE_PROGRESS_1 = 1 +CMAKE_PROGRESS_2 = 2 + diff --git a/thirdparty/bmt/build/src/CMakeFiles/progress.marks b/thirdparty/bmt/build/src/CMakeFiles/progress.marks new file mode 100644 index 0000000..0cfbf08 --- /dev/null +++ b/thirdparty/bmt/build/src/CMakeFiles/progress.marks @@ -0,0 +1 @@ +2 diff --git a/thirdparty/bmt/build/src/Makefile b/thirdparty/bmt/build/src/Makefile new file mode 100644 index 0000000..8963c02 --- /dev/null +++ b/thirdparty/bmt/build/src/Makefile @@ -0,0 +1,180 @@ +# CMAKE generated file: DO NOT EDIT! +# Generated by "Unix Makefiles" Generator, CMake + +# Default target executed when no arguments are given to make. +default_target: all + +.PHONY : default_target + +# Allow only one "make -f Makefile2" at a time, but pass parallelism. +.NOTPARALLEL: + + +#============================================================================= +# Special targets provided by cmake. + +# Disable implicit rules so canonical targets will work. +.SUFFIXES: + + +# Remove some rules from gmake that .SUFFIXES does not remove. +SUFFIXES = + +.SUFFIXES: .hpux_make_needs_suffix_list + + +# Suppress display of executed commands. +$(VERBOSE).SILENT: + + +# A target that is always out of date. +cmake_force: + +.PHONY : cmake_force + +#============================================================================= +# Set environment variables for the build. + +# The shell in which to execute make rules. +SHELL = /bin/sh + +# The CMake executable. +CMAKE_COMMAND = /usr/bin/cmake + +# The command to remove a file. +RM = /usr/bin/cmake -E remove -f + +# Escaping for special characters. +EQUALS = = + +# The top-level source directory on which CMake was run. +CMAKE_SOURCE_DIR = /home/bemdeppi/ham/thirdparty/bmt + +# The top-level build directory on which CMake was run. +CMAKE_BINARY_DIR = /home/bemdeppi/ham/thirdparty/bmt/build + +#============================================================================= +# Targets provided globally by CMake. + +# Special rule for the target edit_cache +edit_cache: + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "Running CMake cache editor..." + /usr/bin/ccmake -H$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) +.PHONY : edit_cache + +# Special rule for the target edit_cache +edit_cache/fast: edit_cache + +.PHONY : edit_cache/fast + +# Special rule for the target rebuild_cache +rebuild_cache: + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "Running CMake to regenerate build system..." + /usr/bin/cmake -H$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) +.PHONY : rebuild_cache + +# Special rule for the target rebuild_cache +rebuild_cache/fast: rebuild_cache + +.PHONY : rebuild_cache/fast + +# The main all target +all: cmake_check_build_system + cd /home/bemdeppi/ham/thirdparty/bmt/build && $(CMAKE_COMMAND) -E cmake_progress_start /home/bemdeppi/ham/thirdparty/bmt/build/CMakeFiles /home/bemdeppi/ham/thirdparty/bmt/build/src/CMakeFiles/progress.marks + cd /home/bemdeppi/ham/thirdparty/bmt/build && $(MAKE) -f CMakeFiles/Makefile2 src/all + $(CMAKE_COMMAND) -E cmake_progress_start /home/bemdeppi/ham/thirdparty/bmt/build/CMakeFiles 0 +.PHONY : all + +# The main clean target +clean: + cd /home/bemdeppi/ham/thirdparty/bmt/build && $(MAKE) -f CMakeFiles/Makefile2 src/clean +.PHONY : clean + +# The main clean target +clean/fast: clean + +.PHONY : clean/fast + +# Prepare targets for installation. +preinstall: all + cd /home/bemdeppi/ham/thirdparty/bmt/build && $(MAKE) -f CMakeFiles/Makefile2 src/preinstall +.PHONY : preinstall + +# Prepare targets for installation. +preinstall/fast: + cd /home/bemdeppi/ham/thirdparty/bmt/build && $(MAKE) -f CMakeFiles/Makefile2 src/preinstall +.PHONY : preinstall/fast + +# clear depends +depend: + cd /home/bemdeppi/ham/thirdparty/bmt/build && $(CMAKE_COMMAND) -H$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) --check-build-system CMakeFiles/Makefile.cmake 1 +.PHONY : depend + +# Convenience name for target. +src/CMakeFiles/example.dir/rule: + cd /home/bemdeppi/ham/thirdparty/bmt/build && $(MAKE) -f CMakeFiles/Makefile2 src/CMakeFiles/example.dir/rule +.PHONY : src/CMakeFiles/example.dir/rule + +# Convenience name for target. +example: src/CMakeFiles/example.dir/rule + +.PHONY : example + +# fast build rule for target. +example/fast: + cd /home/bemdeppi/ham/thirdparty/bmt/build && $(MAKE) -f src/CMakeFiles/example.dir/build.make src/CMakeFiles/example.dir/build +.PHONY : example/fast + +example.o: example.cpp.o + +.PHONY : example.o + +# target to build an object file +example.cpp.o: + cd /home/bemdeppi/ham/thirdparty/bmt/build && $(MAKE) -f src/CMakeFiles/example.dir/build.make src/CMakeFiles/example.dir/example.cpp.o +.PHONY : example.cpp.o + +example.i: example.cpp.i + +.PHONY : example.i + +# target to preprocess a source file +example.cpp.i: + cd /home/bemdeppi/ham/thirdparty/bmt/build && $(MAKE) -f src/CMakeFiles/example.dir/build.make src/CMakeFiles/example.dir/example.cpp.i +.PHONY : example.cpp.i + +example.s: example.cpp.s + +.PHONY : example.s + +# target to generate assembly for a file +example.cpp.s: + cd /home/bemdeppi/ham/thirdparty/bmt/build && $(MAKE) -f src/CMakeFiles/example.dir/build.make src/CMakeFiles/example.dir/example.cpp.s +.PHONY : example.cpp.s + +# Help Target +help: + @echo "The following are some of the valid targets for this Makefile:" + @echo "... all (the default if no target is provided)" + @echo "... clean" + @echo "... depend" + @echo "... edit_cache" + @echo "... rebuild_cache" + @echo "... example" + @echo "... example.o" + @echo "... example.i" + @echo "... example.s" +.PHONY : help + + + +#============================================================================= +# Special targets to cleanup operation of make. + +# Special rule to run CMake to check the build system integrity. +# No rule that depends on this can have commands that come from listfiles +# because they might be regenerated. +cmake_check_build_system: + cd /home/bemdeppi/ham/thirdparty/bmt/build && $(CMAKE_COMMAND) -H$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) --check-build-system CMakeFiles/Makefile.cmake 0 +.PHONY : cmake_check_build_system + diff --git a/thirdparty/bmt/build/src/cmake_install.cmake b/thirdparty/bmt/build/src/cmake_install.cmake new file mode 100644 index 0000000..8c26235 --- /dev/null +++ b/thirdparty/bmt/build/src/cmake_install.cmake @@ -0,0 +1,34 @@ +# Install script for directory: /home/bemdeppi/ham/thirdparty/bmt/src + +# Set the install prefix +if(NOT DEFINED CMAKE_INSTALL_PREFIX) + set(CMAKE_INSTALL_PREFIX "/usr/local") +endif() +string(REGEX REPLACE "/$" "" CMAKE_INSTALL_PREFIX "${CMAKE_INSTALL_PREFIX}") + +# Set the install configuration name. +if(NOT DEFINED CMAKE_INSTALL_CONFIG_NAME) + if(BUILD_TYPE) + string(REGEX REPLACE "^[^A-Za-z0-9_]+" "" + CMAKE_INSTALL_CONFIG_NAME "${BUILD_TYPE}") + else() + set(CMAKE_INSTALL_CONFIG_NAME "") + endif() + message(STATUS "Install configuration: \"${CMAKE_INSTALL_CONFIG_NAME}\"") +endif() + +# Set the component getting installed. +if(NOT CMAKE_INSTALL_COMPONENT) + if(COMPONENT) + message(STATUS "Install component: \"${COMPONENT}\"") + set(CMAKE_INSTALL_COMPONENT "${COMPONENT}") + else() + set(CMAKE_INSTALL_COMPONENT) + endif() +endif() + +# Install shared libraries without execute permission? +if(NOT DEFINED CMAKE_INSTALL_SO_NO_EXE) + set(CMAKE_INSTALL_SO_NO_EXE "0") +endif() + diff --git a/thirdparty/bmt/include/noma/bmt/bmt.hpp b/thirdparty/bmt/include/noma/bmt/bmt.hpp new file mode 100644 index 0000000..d41751d --- /dev/null +++ b/thirdparty/bmt/include/noma/bmt/bmt.hpp @@ -0,0 +1,257 @@ +// Copyright (c) 2013-2017 Matthias Noack (ma.noack.pr@gmail.com) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef noma_bmt_bmt_hpp +#define noma_bmt_bmt_hpp + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace noma { +namespace bmt { + +using rep = double; +using period = std::nano; +using duration = std::chrono::duration; + +// make sure we have a steady clock, if possible one with a high resolution +using clock = std::conditional::type; +using time_point = clock::time_point; + +// convenience duration types +using nanoseconds = std::chrono::duration; +using microseconds = std::chrono::duration; +using milliseconds = std::chrono::duration; +using seconds = std::chrono::duration>; +using minutes = std::chrono::duration>; +using hours = std::chrono::duration>; + + +// NOTE: the code below assumes floating point arithmetic on rep (the type retuned by duration::count()) +static_assert(std::chrono::treat_as_floating_point::value, "rep is required to be a floating point type"); + +class timer +{ +public: + timer() : start(clock::now()) {} + + duration elapsed() const + { + // NOTE: conversion from clock's duration type to ours (see above) + return std::chrono::duration_cast(clock::now() - start); + } + +private: + time_point start; +}; + +class statistics +{ +public: + statistics() = default; + + /** + * Ctor with name and pre-allocation of internal vector of timings measured. + * If the name is used a first column is added for table output. + * If the number of measurements is known, it should be used to + * avoid re-allocating memory while benchmarking. + * Optionally, a number of ignored warm-up values can be specified. + */ + statistics(const std::string& name, size_t expected_count, size_t warmup_count = 0) : warmup_count_(warmup_count), name_(name) + { + times_.reserve(expected_count); + } + + /** + * Same as above with a name, that adds a leading column to the table output. + */ + statistics(size_t count, size_t warmup_count = 0) : statistics("", count, warmup_count) { } + + // add a timer + void add(const timer& t) { add(t.elapsed()); } + + // add a duration + void add(const duration& value) + { + // ignore warmup values + if (warmup_count_ > 0) // NOTE: decrement + { + --warmup_count_; + return; + } + + times_.push_back(value); + ++count_; + duration delta = value - average_; + average_ = average_ + duration(delta.count() / count_); + variance_ = variance_ + duration(delta.count() * delta.count()); + + if (count_ == 1) + { + min_ = value; + max_ = value; + } + else + { + min_ = std::min(min_, value); //;value < min_ ? value : min_; + max_ = std::max(max_, value); //value > max_ ? value : max_; + } + } + + size_t count() const { return count_; } + + duration average() const { return average_; } + + duration median() const + { + // NOTE: when comparing this with the mathematical definition, keep in mind our indices start with 0 + const size_t n = times_.size(); + if (n == 0) + { + return duration(0.0); + } + else if ((n % 2) == 0) // even number of vaules + { + // average the two median elements, round the result, and convert it back to a duration + return duration(0.5 * (times_[(n / 2) - 1].count() + times_[n / 2].count())); + } + else // uneven number of values + { + return times_[n / 2]; + } + } + + duration min() const { return min_; } + + duration max() const { return max_; } + + const std::string& name() const { return name_; } + + duration variance() const + { + return duration((count_ <= 1) ? 0.0 : variance_.count() / rep(count_ - 1)); + } + + // standard error + duration std_error() const + { + return duration(std::sqrt(variance_.count()) / count_); + } + + // relative error (to repeat measurements until small enough) + duration relative_std_error() const + { + return duration(std_error().count() / average().count()); + } + + // delta value for the 95% confidence interval + // (not student's t-test but normal distribution) + // [average - error, average + error] + duration conf95_error() const + { + return 1.96 * std_error(); + } + + // relative error (to repeat measurements until small enough) + duration relative_conf95_error() const + { + return duration(conf95_error().count() / average().count()); + } + + + // returns the header for the string() method + static std::string header_string(bool name_column) + { + std::stringstream ss; + + // add name column if name was set + if (name_column) + ss << "name" << "\t"; + + ss << "average" << "\t" + << "median" << "\t" + << "min" << "\t" + << "max" << "\t" + << "variance" << "\t" + << "std_error" << "\t" + << "relative_std_error" << "\t" + << "conf95_error" << "\t" + << "relative_conf95_error" << "\t" + << "count"; + return ss.str(); + } + + std::string header_string() const + { + return header_string(!name_.empty()); + } + + // returns all data in one line separated by tabs + std::string string() const + { + std::stringstream ss; + + // add name column if name was set + if (!name_.empty()) + ss << name_ << "\t"; + + ss << std::scientific // << std::fixed + << average().count() << "\t" + << median().count() << "\t" + << min().count() << "\t" + << max().count() << "\t" + << variance().count() << "\t" + << std_error().count() << "\t" + << relative_std_error().count() << "\t" + << conf95_error().count() << "\t" + << relative_conf95_error().count() << "\t" + << count(); + return ss.str(); + } + + // writes the raw data to a file (one duration per line) + void to_file(std::string filename) const + { + std::ofstream file(filename.c_str()); + file << std::scientific; + + for (size_t i = 0; i < times_.size(); ++i) + { + file << times_[i].count() << std::endl; + } + + file.close(); + } + + duration sum() const + { + duration result(0.0); + for (size_t i = 0; i < times_.size(); ++i) + result += times_[i]; + return result; + } + +private: + size_t count_ { 0 }; // event counter + size_t warmup_count_ { 0 }; // number of values to drop before taking data + duration average_ { 0.0 }; // average + duration variance_ { 0.0 }; // variance + duration min_ { 0.0 }; // global maximum + duration max_ { 0.0 }; // global minimum + std::vector times_; // all measured values + std::string name_; +}; + +} // namespace bmt +} // namespace noma + +#endif // noma_bmt_bmt.hpp diff --git a/thirdparty/bmt/src/CMakeLists.txt b/thirdparty/bmt/src/CMakeLists.txt new file mode 100644 index 0000000..bd679a6 --- /dev/null +++ b/thirdparty/bmt/src/CMakeLists.txt @@ -0,0 +1,13 @@ +# Copyright (c) 2017 Matthias Noack +# +# See accompanying file LICENSE and README for further information. + +# do not put executable into subdir +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) + +if (NOMA_BMT_BUILD_EXAMPLES) + # simpel example application measuring overhead + add_executable(example example.cpp) + target_link_libraries(example noma_bmt) +endif () + diff --git a/thirdparty/bmt/src/example.cpp b/thirdparty/bmt/src/example.cpp new file mode 100644 index 0000000..7f2953f --- /dev/null +++ b/thirdparty/bmt/src/example.cpp @@ -0,0 +1,58 @@ +// Copyright (c) 2013-2017 Matthias Noack (ma.noack.pr@gmail.com) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include + +#include +#include + +namespace bmt = ::noma::bmt; + +int main(int args, char* argv[]) +{ + size_t iterations = 100; // iterations to be measured + size_t warmup_iterations = 5; // iterations to be skipped before starting measuring + + // generate a table header with name column + std::cout << bmt::statistics::header_string(true) << std::endl; + + // benchmark the cost of timing + bmt::statistics timing_overhead_stats {"timing_overhead", iterations, warmup_iterations}; + + // do all iterations, inlcuding warmup_iterations which will be ignored by stats + for (size_t i = 0; i < (iterations + warmup_iterations); ++i) + { + bmt::timer timer; // creata a timer, starts measuring on construction + // nothing to do + timing_overhead_stats.add(timer); // add timer to statistics object (measuring is stopped) + } + + + // benchmark something that takes time + bmt::statistics sleep_for_stats {"sleep_for", iterations, warmup_iterations}; + + // do all iterations, inlcuding warmup_iterations which will be ignored by stats + for (size_t i = 0; i < (iterations + warmup_iterations); ++i) + { + bmt::timer timer; // creata a timer, starts measuring on construction + std::this_thread::sleep_for(bmt::milliseconds { 25 }), // spend some time + sleep_for_stats.add(timer); // add timer to statistics object (measuring is stopped) + } + + + // output table entries with complete data + std::cout << timing_overhead_stats.string() << std::endl; + std::cout << sleep_for_stats.string() << std::endl; + + // output just the averages in differend units + std::cout << timing_overhead_stats.name() << " average: " + << std::chrono::duration_cast(timing_overhead_stats.average()).count() << " ns" + << std::endl; + std::cout << sleep_for_stats.name() << " average: " + << std::chrono::duration_cast(sleep_for_stats.average()).count() << " ms" + << std::endl; + + return 0; +} From d69ef0b1bf52137df2cb059372bdc790019c714d Mon Sep 17 00:00:00 2001 From: Daniel Deppisch Date: Wed, 31 Oct 2018 14:47:00 +0100 Subject: [PATCH 071/150] initial commit of tcp backend --- CMakeLists.txt | 2 +- include/ham/net/communicator.hpp | 2 + .../ham/net/communicator_mpi_rma_dynamic.hpp | 10 +- include/ham/net/communicator_tcp.hpp | 504 ++++++++++++++++++ include/ham/offload/offload.hpp | 5 +- src/CMakeLists.txt | 22 + src/ham/CMakeLists.txt | 18 +- src/ham/net/communicator_tcp.cpp | 9 + 8 files changed, 562 insertions(+), 10 deletions(-) create mode 100644 include/ham/net/communicator_tcp.hpp create mode 100644 src/ham/net/communicator_tcp.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index cf05180..5c48af8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -13,7 +13,7 @@ message(STATUS "CMAKE_BINARY_DIR: " ${CMAKE_BINARY_DIR}) ### thirdparty dependencies # Boost -find_package(Boost 1.40 COMPONENTS program_options REQUIRED) +find_package(Boost 1.40 COMPONENTS program_options system REQUIRED) add_library(boost_library INTERFACE) target_include_directories (boost_library INTERFACE ${Boost_INCLUDE_DIRS}) target_link_libraries (boost_library INTERFACE ${Boost_LIBRARIES}) diff --git a/include/ham/net/communicator.hpp b/include/ham/net/communicator.hpp index a0a6164..390279a 100644 --- a/include/ham/net/communicator.hpp +++ b/include/ham/net/communicator.hpp @@ -58,6 +58,8 @@ namespace net { #include "ham/net/communicator_scif.hpp" #elif defined HAM_COMM_MPI_RMA_DYNAMIC #include "ham/net/communicator_mpi_rma_dynamic.hpp" +#elif defined HAM_COMM_TCP +#include "ham/net/communicator_tcp.hpp" #else static_assert(false, "Please define either HAM_COMM_MPI, HAM_COMM_MPI_RMA_DYNAMIC or HAM_COMM_SCIF."); #endif diff --git a/include/ham/net/communicator_mpi_rma_dynamic.hpp b/include/ham/net/communicator_mpi_rma_dynamic.hpp index 4afd7f5..4c4bb65 100644 --- a/include/ham/net/communicator_mpi_rma_dynamic.hpp +++ b/include/ham/net/communicator_mpi_rma_dynamic.hpp @@ -163,7 +163,7 @@ class communicator { communicator(int argc, char* argv[]) { - HAM_DEBUG( std::cout << "communicator::communicator(): initialising MPI" << std::endl; ) + HAM_DEBUG( HAM_LOG << "communicator::communicator(): initialising MPI" << std::endl; ) instance_ = this; int p; @@ -172,7 +172,7 @@ class communicator { { std::cerr << "Could not initialise MPI with MPI_THREAD_MULTIPLE, MPI_Init_thread() returned " << p << std::endl; } - HAM_DEBUG( std::cout << "communicator::communicator(): initialising MPI ..." << std::endl; ) + HAM_DEBUG( HAM_LOG << "communicator::communicator(): initialising MPI ..." << std::endl; ) int t; MPI_Comm_rank(MPI_COMM_WORLD, &t); @@ -181,7 +181,7 @@ class communicator { nodes_ = t; host_node_ = 0; // TODO(improvement): make configureable, like for SCIF - HAM_DEBUG( std::cout << "communicator::communicator(): initialising MPI done" << std::endl; ) + HAM_DEBUG( HAM_LOG << "communicator::communicator(): initialising MPI done" << std::endl; ) peers = new mpi_peer[nodes_]; @@ -269,7 +269,7 @@ class communicator { } // debug msg - HAM_DEBUG( std::cout << "Rank: " << this_node_ << " in loop run " << i << " created REAL windows..." << std::endl; ) + HAM_DEBUG( HAM_LOG << "Rank: " << this_node_ << " in loop run " << i << " created REAL windows..." << std::endl; ) } else { // create remote windows without memory (join the collective call and retreive the window handle) @@ -277,7 +277,7 @@ class communicator { MPI_Win_create(nullptr, 0, 1, MPI_INFO_NULL, MPI_COMM_WORLD, &(peers[i].msg_flag_win)); // MPI_Win_create(nullptr, 0, 1, MPI_INFO_NULL, MPI_COMM_WORLD, &(peers[i].flag_win)); // debug msg - HAM_DEBUG( std::cout << "Rank: " << this_node_ << " in loop run " << i << " creating EMPTY windows..." << std::endl; ) + HAM_DEBUG( HAM_LOG << "Rank: " << this_node_ << " in loop run " << i << " creating EMPTY windows..." << std::endl; ) //MPI_Win_allocate(0, 1, MPI_INFO_NULL, MPI_COMM_WORLD, peers[i].msg_win_data, &(peers[i].rma_msg_win)); //MPI_Win_allocate(0, 1, MPI_INFO_NULL, MPI_COMM_WORLD, peers[i].flag_win_data, &(peers[i].rma_flag_win)); } diff --git a/include/ham/net/communicator_tcp.hpp b/include/ham/net/communicator_tcp.hpp new file mode 100644 index 0000000..66d4a55 --- /dev/null +++ b/include/ham/net/communicator_tcp.hpp @@ -0,0 +1,504 @@ +// Copyright (c) 2013-2014 Matthias Noack (ma.noack.pr@gmail.com) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef ham_net_communicator_tcp_hpp +#define ham_net_communicator_tcp_hpp + +#include +#include // memcpy +#include // posix_memalign +#include // async thread + +#include +#include + +#include "ham/misc/options.hpp" +#include "ham/misc/constants.hpp" +#include "ham/misc/resource_pool.hpp" +#include "ham/misc/types.hpp" +#include "ham/util/debug.hpp" +#include "ham/util/log.hpp" + +using boost::asio::ip::tcp; + +namespace ham { +namespace net { + +template +class buffer_ptr { +public: + buffer_ptr(); + buffer_ptr(T* ptr, node_t node) : ptr_(ptr), node_(node) { } + + T* get() { return ptr_; } + node_t node() { return node_; } + + // element access + T& operator [] (size_t i); + + // basic pointer arithmetic to address sub-buffers + buffer_ptr operator+(size_t off) + { + return buffer_ptr(ptr_ + off, node_); + } + +private: + T* ptr_; + node_t node_; +}; + +class node_descriptor +{ +public: + const char* name() const { return name_; } +private: + //std::string name_; // TODO(improvement): unify node description for all back-ends, NOTE: std::string is not trivally transferable + char name_[64]= "Node descriptions not available for TCP backend"; + + friend class net::communicator; +}; + +class communicator : public std::enable_shared_from_this { +public: + // externally used interface of request must be shared across all communicator-implementations + class request { + public: + request() : valid_(false), received_(false), sent_(false) {} // instantiate invalid + + request(node_t target_node, node_t source_node, size_t send_buffer_index, size_t recv_buffer_index) + : target_node(target_node), source_node(source_node), valid_(true), sent_(false), received_(false), send_buffer_index(send_buffer_index), recv_buffer_index(recv_buffer_index), req_count(0) + {} + + // return true if request was finished + bool test() + { + // tcp backend does not feature asynchronous operations yet + // HAM_DEBUG( HAM_LOG << "request::test(), TCP backend does not feature asynchronous operations" << std::endl; ) + + // int flag = 0; + // MPI_Testall(req_count, mpi_reqs, &flag, MPI_STATUS_IGNORE); // just test the receive request, since the send belonging to the request triggers the remote send that is received + return received_; + } + + void* get() // blocks + { + // tcp backend does not feature asynchronous operations yet + // HAM_DEBUG( HAM_LOG << "request::get(), TCP backend does not feature asynchronous operations" << std::endl; ) + // HAM_DEBUG( HAM_LOG << "request::get(), before MPI_Waitall()" << std::endl; ) + // MPI_Waitall(req_count, mpi_reqs, MPI_STATUS_IGNORE); // must wait for all requests to satisfy the standard + + // block until async receive handler reports completion + while(!received_); + + return static_cast(&communicator::instance().peers[target_node].msg_buffers[recv_buffer_index]); + } + + template + void send_result(T* result_msg, size_t size) + { + assert(communicator::this_node() == target_node); // this assert fails if send_result is called from the wrong side + + // TODO(improvement, low priority): better go through communicator, such that no MPI calls are anywhere else + // MPI_Send(result_msg, size, MPI_BYTE, source_node, constants::RESULT_TAG, MPI_COMM_WORLD); + + communicator::instance().send_result(target_node, result_msg, size); + // don't need size * sizeof(T) because req.send_result is called as send_result((void*)&a, sizeof(a)) in offload_msg.hpp + } + + bool valid() const + { + return valid_; + } + + bool received() const { + return received_; + } + + bool sent() const { + return sent_; + } + + node_t target_node; + node_t source_node; + bool valid_; + bool received_; // used for the async receive handler to set to true, checked for completion + bool sent_; // used for the async send handler to set to true... unused, but the handler likes to do something + + // only needed by the sender + enum { NUM_REQUESTS = 3 }; + + size_t send_buffer_index; // buffer to use for sending the message + size_t recv_buffer_index; // buffer to use for receiving the result + size_t req_count; + + private: + // not needed since tcp backend does not offer async operations + // MPI_Request mpi_reqs[NUM_REQUESTS]; // for sending the msg, receiving the result, and an associated data transfer + }; // class request + + typedef request& request_reference_type; + typedef const request& request_const_reference_type; + + communicator(int argc, char* argv[]) : node_desc_dummy() + { + HAM_DEBUG( HAM_LOG << "communicator::communicator(): initialising configuration" << std::endl; ) + + instance_ = this; + + // command line configuration + nodes_ = 0; // number of nodes + this_node_ = 0; // "rank" of this node + this_port_ = 0; // tcp port used for this node + host_node_ = 0; // host node + host_address_ = "empty"; // host IP address or resolvable name + host_port_ = 0; // host port + + + // command line options + boost::program_options::options_description desc("HAM Options"); + desc.add_options() + ("ham-help", "Shows this message") + ("ham-process-count", boost::program_options::value(&nodes_)->required(), "Required: Number of processes the job consists of.") + ("ham-address", boost::program_options::value(&this_node_)->required(), "Required: This processes UNIQUE address, between 0 and ham-process-count-1. 0 will make the process the host (required EXACTLY once). -1 will assign any free non-host rank.") + ("ham-tcp-port", boost::program_options::value(&this_port_)->default_value(this_port_), "TCP port used if this process is a client. Default will auto select an available port. Host will use ham-tcp-hostport and ignore this.") + ("ham-tcp-hostname", boost::program_options::value(&host_address_)->required(), "Required: IP address or resolvable hostname of the host process. Required. May be used on host to select interface.") + ("ham-tcp-hostport", boost::program_options::value(&host_port_)->required(), "Required: TCP port used by the host.") + ; + + boost::program_options::variables_map vm; + + const char* options_env = std::getenv("HAM_OPTIONS"); + if (options_env) + { + char split_character = ' '; + if (std::getenv("HAM_OPTIONS_NO_SPACES")) // value does not matter + split_character = '_'; + + // parse from environment + boost::program_options::store(boost::program_options::command_line_parser(detail::options::split(std::string(options_env), split_character)).options(desc).allow_unregistered().run(), vm); + } + else + { + // parse from command line + boost::program_options::store(boost::program_options::command_line_parser(argc, argv).options(desc).allow_unregistered().run(), vm); + } + + boost::program_options::notify(vm); + + if(vm.count("ham-help")) + { + std::cout << desc << std::endl; + exit(0); + } + + HAM_DEBUG( HAM_LOG << "communicator::communicator(): command line config:" << std::endl + << "ham-process-count: " << nodes_ << std::endl + << "ham-address: " << this_node_ << std::endl + << "ham-tcp-port: " << this_port_ << std::endl + << "ham-tcp-hostname: " << host_address_ << std::endl + << "ham-tcp-hostport: " << host_port_ << std:: endl; + ) + + + HAM_DEBUG( HAM_LOG << "communicator::communicator(): initialising configuration done" << std::endl; ) + + HAM_DEBUG( HAM_LOG << "communicator::communicator(): connecting targets to host" << std::endl; ) + + + // init peers structure + peers = new tcp_peer[nodes_]; + + // targets init tcp connection to host + if(!is_host()) { + tcp::socket sock(io_context); // socket is always stored with index = target node, so no "if_host" switching is necessary for functions executed on host and target + peers[host_node_].tcp_socket = &sock; + tcp::resolver resolver(io_context); + boost::asio::connect(*peers[host_node_].tcp_socket, resolver.resolve({host_address_, host_port_})); + + // send requested rank to host + HAM_DEBUG( HAM_LOG << "communicator::communicator(): requesting ham-address " << this_node_ << "from host" << std::endl; ) + boost::asio::write(peers[host_node_].tcp_socket, boost::asio::buffer((void*)&this_node_, sizeof(this_node_))); + // recv rank from host + boost::asio::read(peers[host_node_].tcp_socket, boost::asio::buffer((void*)&this_node_, sizeof(this_node_))); + HAM_DEBUG( HAM_LOG << "communicator::communicator(): received ham-address " << this_node_ << "from host" << std::endl; ) + } + + // host accepts tcp connection from targets + if(is_host()) { + tcp::resolver resolver(io_context); + tcp::resolver::query query(tcp::v4(), host_address_, host_port_); + tcp::resolver::iterator iter = resolver.resolve(query); + tcp::endpoint endpoint = iter->endpoint(); + tcp::acceptor acc(io_context, endpoint); + + node_t req_ranks[nodes_]; // store requested ranks in order of connection + tcp::socket temp_socks[nodes_]; // store sockets temporarily in connection order + bool taken_ranks[nodes_] {false}; + taken_ranks[0] = true; // host rank has to be correctly provided and is therefore already taken (by the executing process) + + for(int i=1; i < nodes_; i++) { + temp_socks[i] = acc.accept(); // accept connection + + // recv rank + boost::asio::read(temp_socks[i], boost::asio::buffer((void *) &req_ranks[i], sizeof(node_t))); + } + + // rearrange sockets and inform targets of resulting rank + for (int j = 1; j < nodes_; ++j) { + if(req_ranks[j] < -1 || req_ranks[j] > nodes_-1) { // check if rank invalid + std::cout << "communicator::communicator(): illegal ham-address requested:" << req_ranks[j] << std::endl; + exit(-1); + }else if(req_ranks[j] == -1) { // skip wildcard ranks, handled later to avoid conflicting ranks with following connects + HAM_DEBUG( HAM_LOG << "communicator::communicator(): connection " << j << " requested wildcard ham-address" << std::endl; ) + continue; + } + if(taken_ranks[req_ranks[j]]) { // check if rank already taken + std::cout << "communicator::communicator(): ham-address requested more than once:" << req_ranks[j] << std::endl; + exit(-1); + } else { + node_t rrank = req_ranks[j]; + HAM_DEBUG( HAM_LOG << "communicator::communicator(): connection " << j << " requested ham-address: " << rrank << std::endl; ) + peers[rrank].tcp_socket = std::move(temp_socks[j]); // = move https://www.boost.org/doc/libs/1_65_0/doc/html/boost_asio/reference/basic_stream_socket/operator_eq_.html + taken_ranks[rrank] = true; // mark the requested rank as taken + HAM_DEBUG( HAM_LOG << "communicator::communicator(): associated ham-address: " << rrank << " with connection " << j << std::endl; ) + // send assigned rank to target + boost::asio::write(peers[rrank].tcp_socket, boost::asio::buffer((void*)&rrank, sizeof(rrank))); + } + } + + // handle wildcard ranks + for (int k = 1; k < nodes_; ++k) { // k is index to connections in connection order + if(req_ranks[k] == -1) { // find wildcard connections + + for (int i = 1; i < nodes_; ++i) { // i is index to ranks in final rank order + if(!taken_ranks[i]) { // find a free rank + HAM_DEBUG( HAM_LOG << "communicator::communicator(): associating wildcard connection: " << k << " with ham-address " << i << std::endl; ) + peers[i].tcp_socket = temp_socks[k]; + taken_ranks[i] = true; + boost::asio::write(peers[i].tcp_socket, boost::asio::buffer((void*)&i, sizeof(i))); + break; // stop if free rank is assigned, go back to k-loop for next wildcard connection + } + } + } + } + } + + HAM_DEBUG( HAM_LOG << "communicator::communicator(): connecting hosts done" << std::endl; ) + + // host init message buffers + if (is_host()) { + for (node_t i = 1; i < nodes_; ++i) { // TODO(improvement): needs to be changed when host-rank becomes configurable + // allocate buffers + peers[i].msg_buffers = allocate_buffer(constants::MSG_BUFFERS, this_node_); + // fill resource pools + for(size_t j = constants::MSG_BUFFERS; j > 0; --j) { + peers[i].buffer_pool.add(j-1); + } + } + + // host runs io_context in separate thread (asynchronous progress thread) for async operations + boost::asio::io_service::work work(io_context); + std::thread thread([&io_context](){ io_context.run(); }); + } + + + + } + + ~communicator() + { + // finalize + if(is_host()) { + io_context.stop(); + } + HAM_DEBUG( HAM_LOG << "~communicator" << std::endl; ) + } + + + request allocate_request(node_t remote_node) + { + HAM_DEBUG( HAM_LOG << "communicator::allocate_next_request(): remote_node = " << remote_node << std::endl; ) + + const size_t send_buffer_index = peers[remote_node].buffer_pool.allocate(); + const size_t recv_buffer_index = peers[remote_node].buffer_pool.allocate(); + + return { remote_node, this_node_, send_buffer_index, recv_buffer_index }; + } + + void free_request(request& req) + { + assert(req.valid()); + assert(req.source_node == this_node_); + + tcp_peer& peer = peers[req.target_node]; + + peer.buffer_pool.free(req.send_buffer_index); + peer.buffer_pool.free(req.recv_buffer_index); + req.valid_ = false; + } + +public: + + // called by host only + void send_msg(request_reference_type req, void* msg, size_t size) + { + // copy message from caller into transfer buffer + void* msg_buffer = static_cast(&peers[req.target_node].msg_buffers[req.send_buffer_index]); + memcpy(msg_buffer, msg, size); + + // tcp write + auto self(shared_from_this()); + boost::asio::async_write(peers[req.target_node].tcp_socket, boost::asio::buffer(msg_buffer, size), + [this, self, &req](boost::system::error_code ec, size_t length) { + req.sent_ = true; + } + ); + // MPI_Isend(msg_buffer, size, MPI_BYTE, req.target_node, constants::DEFAULT_TAG, MPI_COMM_WORLD, &req.next_mpi_request()); + } + + // to be used by the offload target's main loop: synchronously receive one message at a time + // NOTE: the local static receive buffer! + void* recv_msg_host(void* msg = nullptr, size_t size = constants::MSG_SIZE) + { + static msg_buffer buffer; // NOTE ! + // MPI_Recv(&buffer, size, MPI_BYTE, host_node_, constants::DEFAULT_TAG, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + boost::asio::read(peers[host_node_].tcp_socket, boost::asio::buffer(&buffer, size)); + return static_cast(&buffer); + } + + // send result through communicator + // only to be used by request.send_result() + template + void send_result(node_t target_node, T* message, size_t size) { + + boost::asio::write(peers[target_node].tcp_socket, boost::asio::buffer((void*)message, size)); + } + + // trigger receiving the result of an active message on the host + void recv_result(request_reference_type req) + { + // tcp receive + auto self(shared_from_this()); + boost::asio::async_read(peers[req.target_node].tcp_socket, boost::asio::buffer(static_cast(&peers[req.target_node].msg_buffers[req.recv_buffer_index]), constants::MSG_SIZE), + [this, self, &req](boost::system::error_code ec, size_t length) { + req.received_ = true; + } + ); + // MPI_Irecv(static_cast(&peers[req.target_node].msg_buffers[req.recv_buffer_index]), constants::MSG_SIZE, MPI_BYTE, req.target_node, constants::RESULT_TAG, MPI_COMM_WORLD, &req.next_mpi_request()); + return; + } + + template + void send_data(T* local_source, buffer_ptr remote_dest, size_t size) + { + // tcp send + + boost::asio::write(peers[remote_dest.node()].tcp_socket, boost::asio::buffer((void*)local_source, size * sizeof(T))); + // MPI_Send((void*)local_source, size * sizeof(T), MPI_BYTE, remote_dest.node(), constants::DATA_TAG, MPI_COMM_WORLD); + } + + // to be used by the host + template + void send_data_async(request_reference_type req, T* local_source, buffer_ptr remote_dest, size_t size) + { + auto self(shared_from_this()); + boost::asio::async_write(peers[remote_dest.node()].tcp_socket, boost::asio::buffer((void*)local_source, size*sizeof(T)), + [this, self, &req](boost::system::error_code ec, size_t length) { + req.sent_ = true; + } + ); + // MPI_Isend((void*)local_source, size * sizeof(T), MPI_BYTE, remote_dest.node(), constants::DATA_TAG, MPI_COMM_WORLD, &req.next_mpi_request()); + } + + + template + void recv_data(buffer_ptr remote_source, T* local_dest, size_t size) + { + // tcp recv + boost::asio::read(peers[remote_source.node()].tpc_socket, boost::asio::buffer((void*)local_dest, size * sizeof(T))); + // MPI_Recv((void*)local_dest, size * sizeof(T), MPI_BYTE, remote_source.node(), constants::DATA_TAG, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + } + + // to be used by the host + template + void recv_data_async(request_reference_type req, buffer_ptr remote_source, T* local_dest, size_t size) + { + auto self(shared_from_this()); + boost::asio::async_read(peers[remote_source.node()].tpc_socket, boost::asio::buffer(static_cast(local_dest), size*sizeof(T)), + [this, self, &req](boost::system::error_code ec, size_t length) { + req.received_ = true; + } + ); + // MPI_Irecv(static_cast(local_dest), size * sizeof(T), MPI_BYTE, remote_source.node(), constants::DATA_TAG, MPI_COMM_WORLD, &req.next_mpi_request()); + } + + template + buffer_ptr allocate_buffer(const size_t n, node_t source_node) + { + T* ptr; + //int err = + posix_memalign((void**)&ptr, constants::CACHE_LINE_SIZE, n * sizeof(T)); + // NOTE: no ctor is called + return buffer_ptr(ptr, this_node_); + } + + template + void free_buffer(buffer_ptr ptr) + { + assert(ptr.node() == this_node_); + // NOTE: no dtor is called + free(static_cast(ptr.get())); + } + + static communicator& instance() { return *instance_; } + static node_t this_node() { return instance().this_node_; } + static size_t num_nodes() { return instance().nodes_; } + bool is_host() { return this_node_ == 0; } // TODO(improvement): ham_address == ham_host_address ; } + bool is_host(node_t node) { return node == 0; } // TODO(improvement): node == ham_host_address; } + + static const node_descriptor& get_node_description(node_t node) + { + return instance().node_desc_dummy; + } + +private: + static communicator* instance_; + size_t nodes_; + node_t this_node_; + int this_port_; + node_t host_node_; + std::string host_address_; + int host_port_; + node_descriptor node_desc_dummy; + boost::asio::io_service io_context; + + struct tcp_peer { + buffer_ptr msg_buffers; // buffers used for MPI_ISend and IRecv by the sender + + // needed by sender to manage which buffers are in use and which are free + // just manages indices, that can be used by + detail::resource_pool buffer_pool; + + // tcp socket + tcp::socket* tcp_socket; + }; + + tcp_peer* peers; +}; + +template +buffer_ptr::buffer_ptr() : buffer_ptr(nullptr, communicator::this_node()) { } + +template +T& buffer_ptr::operator[](size_t i) +{ + assert(node_ == communicator::this_node()); + return ptr_[i]; +} + +} // namespace net +} // namespace ham + +#endif // ham_net_communicator_tcp_hpp diff --git a/include/ham/offload/offload.hpp b/include/ham/offload/offload.hpp index 0148e7f..58e7e19 100644 --- a/include/ham/offload/offload.hpp +++ b/include/ham/offload/offload.hpp @@ -223,7 +223,7 @@ future put(T* local_source, buffer_ptr& remote_dest, size_t n) // TODO(improvement): create a data transfer thread for one-sided comm.send_data(local_source, remote_dest, n); // sync return future(true); // return dummy future -#elif defined HAM_COMM_MPI +#elif defined(HAM_COMM_MPI) || defined(HAM_COMM_TCP) // allocate a request and construct a future future result(comm.allocate_request(remote_dest.node())); // generate an offload message @@ -261,7 +261,7 @@ future get(buffer_ptr remote_source, T* local_dest, size_t n) // TODO(improvement): create a data transfer thread for one-sided comm.recv_data(remote_source, local_dest, n); // sync return future(true); // return dummy future -#elif defined HAM_COMM_MPI +#elif defined(HAM_COMM_MPI) || defined(HAM_COMM_TCP) // allocate a request and construct a future future result(comm.allocate_request(remote_source.node())); // generate an offload message @@ -271,7 +271,6 @@ future get(buffer_ptr remote_source, T* local_dest, size_t n) comm.recv_data_async(result.get_request(), remote_source, local_dest, n); comm.recv_result(result.get_request()); // trigger receiving the result // TODO(improvement): the recv_result() is not needed, could remove and remove send_result() from offload_read_msg to reduce synchronization overhead - return result; #elif defined HAM_COMM_MPI_RMA_DYNAMIC future result(comm.allocate_data_request(remote_source.node())); diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index b34c36e..8dbb21b 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -11,6 +11,11 @@ add_subdirectory(ham) ### Benchmarks ## Explicit targets (not built by default) + +# TCP benchmarks +add_executable(benchmark_ham_offload_tcp benchmark_ham_offload.cpp) +target_link_libraries(benchmark_ham_offload_tcp ham_offload_tcp) + # Intel LEO offload directive benchmark, requires Intel compiler if (CMAKE_CXX_COMPILER_ID STREQUAL "Intel") add_executable(benchmark_intel_leo EXCLUDE_FROM_ALL benchmark_intel_leo.cpp) @@ -36,7 +41,21 @@ endif() add_executable(active_msgs active_msgs.cpp) target_link_libraries(active_msgs ham_interface) +# TCP tests +add_executable(ham_offload_test_tcp ham_offload.cpp) +target_link_libraries(ham_offload_test_tcp ham_offload_tcp) + +add_executable(inner_product_tcp inner_product.cpp) +target_link_libraries(inner_product_tcp ham_offload_tcp) + +add_executable(test_data_transfer_tcp test_data_transfer.cpp) +target_link_libraries(test_data_transfer_tcp ham_offload_tcp) + +add_executable(test_argument_transfer_tcp test_argument_transfer.cpp) +target_link_libraries(test_argument_transfer_tcp ham_offload_tcp) + if (MPI_FOUND) +# two-sided MPI add_executable(ham_offload_test_mpi ham_offload.cpp) target_link_libraries(ham_offload_test_mpi ham_offload_mpi) @@ -52,6 +71,8 @@ if (MPI_FOUND) add_executable(test_argument_transfer_mpi test_argument_transfer.cpp) target_link_libraries(test_argument_transfer_mpi ham_offload_mpi) +# RMA MPI + add_executable(ham_offload_test_mpi_rma_dynamic ham_offload.cpp) target_link_libraries(ham_offload_test_mpi_rma_dynamic ham_offload_mpi_rma_dynamic) @@ -63,6 +84,7 @@ if (MPI_FOUND) add_executable(test_argument_transfer_mpi_rma_dynamic test_argument_transfer.cpp) target_link_libraries(test_argument_transfer_mpi_rma_dynamic ham_offload_mpi_rma_dynamic) + endif() if (SCIF_FOUND) diff --git a/src/ham/CMakeLists.txt b/src/ham/CMakeLists.txt index 278d452..cc0df10 100644 --- a/src/ham/CMakeLists.txt +++ b/src/ham/CMakeLists.txt @@ -8,7 +8,7 @@ cmake_minimum_required(VERSION 3.2 FATAL_ERROR) # TODO verfify # interface target for ham add_library(ham_interface INTERFACE) -target_compile_features(ham_interface INTERFACE cxx_auto_type cxx_range_for cxx_variadic_templates) +target_compile_features(ham_interface INTERFACE ) target_link_libraries(ham_interface INTERFACE noma_bmt boost_library) target_include_directories(ham_interface INTERFACE ${CMAKE_CURRENT_LIST_DIR}/../../include) @@ -20,6 +20,22 @@ set(HAM_LIB_SRC offload/offload.cpp util/cpu_affinity.cpp) +# TCP +add_library(ham_offload_tcp # SHARED if BUILD_SHARED_LIBS = TRUE + net/communicator.cpp + net/communicator_tcp.cpp + offload/runtime.cpp + offload/offload.cpp + offload/main.cpp + util/cpu_affinity.cpp) +target_compile_definitions(ham_offload_tcp PUBLIC -DHAM_COMM_TCP=1) +target_link_libraries(ham_offload_tcp PUBLIC ham_interface boost_library) + +set_target_properties(ham_offload_tcp PROPERTIES + CXX_STANDARD 11 + CXX_STANDARD_REQUIRED YES + CXX_EXTENSIONS NO) + if (MPI_FOUND) add_library(ham_offload_mpi # SHARED if BUILD_SHARED_LIBS = TRUE ${HAM_LIB_SRC} diff --git a/src/ham/net/communicator_tcp.cpp b/src/ham/net/communicator_tcp.cpp new file mode 100644 index 0000000..e4e5dbd --- /dev/null +++ b/src/ham/net/communicator_tcp.cpp @@ -0,0 +1,9 @@ +// Copyright (c) 2013-2014 Matthias Noack (ma.noack.pr@gmail.com) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include "ham/net/communicator.hpp" + +ham::net::communicator* ham::net::communicator::instance_ = nullptr; + From 7f778bbc9d504b615ee5bab51ab9ee02e3994706 Mon Sep 17 00:00:00 2001 From: Daniel Deppisch Date: Wed, 31 Oct 2018 15:35:55 +0100 Subject: [PATCH 072/150] changed client connection --- include/ham/net/communicator_tcp.hpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/include/ham/net/communicator_tcp.hpp b/include/ham/net/communicator_tcp.hpp index 66d4a55..defa070 100644 --- a/include/ham/net/communicator_tcp.hpp +++ b/include/ham/net/communicator_tcp.hpp @@ -212,10 +212,12 @@ class communicator : public std::enable_shared_from_this { // targets init tcp connection to host if(!is_host()) { - tcp::socket sock(io_context); // socket is always stored with index = target node, so no "if_host" switching is necessary for functions executed on host and target - peers[host_node_].tcp_socket = &sock; + tcp::socket sock(io_context); // socket is always stored with index = target node, so no "if_host" switching is necessary for functions executed on host and target + peers[host_node_].tcp_socket = &sock; tcp::resolver resolver(io_context); - boost::asio::connect(*peers[host_node_].tcp_socket, resolver.resolve({host_address_, host_port_})); + tcp::resolver::query query(tcp::v4(), host_address_, host_port_); + tcp::resolver::iterator iter = resolver.resolve(query); + boost::asio::connect(*peers[host_node_].tcp_socket, iter); // send requested rank to host HAM_DEBUG( HAM_LOG << "communicator::communicator(): requesting ham-address " << this_node_ << "from host" << std::endl; ) From 9201b23dbabb4df8e30235176836e9bf7b55f904 Mon Sep 17 00:00:00 2001 From: Daniel Deppisch Date: Wed, 31 Oct 2018 15:53:22 +0100 Subject: [PATCH 073/150] changed client connection --- include/ham/net/communicator_tcp.hpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/include/ham/net/communicator_tcp.hpp b/include/ham/net/communicator_tcp.hpp index defa070..09ace40 100644 --- a/include/ham/net/communicator_tcp.hpp +++ b/include/ham/net/communicator_tcp.hpp @@ -153,7 +153,7 @@ class communicator : public std::enable_shared_from_this { this_port_ = 0; // tcp port used for this node host_node_ = 0; // host node host_address_ = "empty"; // host IP address or resolvable name - host_port_ = 0; // host port + host_port_ = "empty"; // host port // command line options @@ -215,9 +215,9 @@ class communicator : public std::enable_shared_from_this { tcp::socket sock(io_context); // socket is always stored with index = target node, so no "if_host" switching is necessary for functions executed on host and target peers[host_node_].tcp_socket = &sock; tcp::resolver resolver(io_context); - tcp::resolver::query query(tcp::v4(), host_address_, host_port_); - tcp::resolver::iterator iter = resolver.resolve(query); - boost::asio::connect(*peers[host_node_].tcp_socket, iter); + //tcp::resolver::query query(tcp::v4(), host_address_, host_port_); + //tcp::resolver::iterator iter = resolver.resolve(query); + boost::asio::connect(*peers[host_node_].tcp_socket, resolver.resolve(&host_address_, &host_port_)); // send requested rank to host HAM_DEBUG( HAM_LOG << "communicator::communicator(): requesting ham-address " << this_node_ << "from host" << std::endl; ) @@ -472,7 +472,7 @@ class communicator : public std::enable_shared_from_this { int this_port_; node_t host_node_; std::string host_address_; - int host_port_; + std::string host_port_; node_descriptor node_desc_dummy; boost::asio::io_service io_context; From f894ec1c8f66a7c6affc30f0cea9a8964fb9d0e2 Mon Sep 17 00:00:00 2001 From: Daniel Deppisch Date: Wed, 31 Oct 2018 17:21:07 +0100 Subject: [PATCH 074/150] bla --- include/ham/net/communicator_tcp.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/ham/net/communicator_tcp.hpp b/include/ham/net/communicator_tcp.hpp index 09ace40..d6805df 100644 --- a/include/ham/net/communicator_tcp.hpp +++ b/include/ham/net/communicator_tcp.hpp @@ -217,7 +217,7 @@ class communicator : public std::enable_shared_from_this { tcp::resolver resolver(io_context); //tcp::resolver::query query(tcp::v4(), host_address_, host_port_); //tcp::resolver::iterator iter = resolver.resolve(query); - boost::asio::connect(*peers[host_node_].tcp_socket, resolver.resolve(&host_address_, &host_port_)); + boost::asio::connect(*peers[host_node_].tcp_socket, resolver.resolve(host_address_, host_port_)); // send requested rank to host HAM_DEBUG( HAM_LOG << "communicator::communicator(): requesting ham-address " << this_node_ << "from host" << std::endl; ) From da62e5901f8351149f8bdfa7617f1b892e643d1a Mon Sep 17 00:00:00 2001 From: Daniel Deppisch Date: Wed, 31 Oct 2018 17:25:01 +0100 Subject: [PATCH 075/150] bla --- include/ham/net/communicator_tcp.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/ham/net/communicator_tcp.hpp b/include/ham/net/communicator_tcp.hpp index d6805df..bf8fe99 100644 --- a/include/ham/net/communicator_tcp.hpp +++ b/include/ham/net/communicator_tcp.hpp @@ -236,7 +236,7 @@ class communicator : public std::enable_shared_from_this { tcp::acceptor acc(io_context, endpoint); node_t req_ranks[nodes_]; // store requested ranks in order of connection - tcp::socket temp_socks[nodes_]; // store sockets temporarily in connection order + tcp::socket* temp_socks = new socket[node_](io_context); // store sockets temporarily in connection order bool taken_ranks[nodes_] {false}; taken_ranks[0] = true; // host rank has to be correctly provided and is therefore already taken (by the executing process) From 1d8bb99a6971418dc07bc35da94f171af9d403f6 Mon Sep 17 00:00:00 2001 From: Daniel Deppisch Date: Wed, 31 Oct 2018 17:26:33 +0100 Subject: [PATCH 076/150] bla --- include/ham/net/communicator_tcp.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/ham/net/communicator_tcp.hpp b/include/ham/net/communicator_tcp.hpp index bf8fe99..4cbd8c4 100644 --- a/include/ham/net/communicator_tcp.hpp +++ b/include/ham/net/communicator_tcp.hpp @@ -236,7 +236,7 @@ class communicator : public std::enable_shared_from_this { tcp::acceptor acc(io_context, endpoint); node_t req_ranks[nodes_]; // store requested ranks in order of connection - tcp::socket* temp_socks = new socket[node_](io_context); // store sockets temporarily in connection order + tcp::socket* temp_socks = new tcp::socket[node_](io_context); // store sockets temporarily in connection order bool taken_ranks[nodes_] {false}; taken_ranks[0] = true; // host rank has to be correctly provided and is therefore already taken (by the executing process) From 41b9a7f5d84edba739dc877bca9afc44eef5ee16 Mon Sep 17 00:00:00 2001 From: Daniel Deppisch Date: Wed, 31 Oct 2018 17:29:30 +0100 Subject: [PATCH 077/150] bla --- include/ham/net/communicator_tcp.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/ham/net/communicator_tcp.hpp b/include/ham/net/communicator_tcp.hpp index 4cbd8c4..5de053b 100644 --- a/include/ham/net/communicator_tcp.hpp +++ b/include/ham/net/communicator_tcp.hpp @@ -236,7 +236,7 @@ class communicator : public std::enable_shared_from_this { tcp::acceptor acc(io_context, endpoint); node_t req_ranks[nodes_]; // store requested ranks in order of connection - tcp::socket* temp_socks = new tcp::socket[node_](io_context); // store sockets temporarily in connection order + tcp::socket* temp_socks = new tcp::socket[nodes_](io_context); // store sockets temporarily in connection order bool taken_ranks[nodes_] {false}; taken_ranks[0] = true; // host rank has to be correctly provided and is therefore already taken (by the executing process) From 5b2df19cc783ecb850ffb76749c7187c3228a52d Mon Sep 17 00:00:00 2001 From: Daniel Deppisch Date: Wed, 31 Oct 2018 17:39:50 +0100 Subject: [PATCH 078/150] bla --- include/ham/net/communicator_tcp.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/ham/net/communicator_tcp.hpp b/include/ham/net/communicator_tcp.hpp index 5de053b..cb3745a 100644 --- a/include/ham/net/communicator_tcp.hpp +++ b/include/ham/net/communicator_tcp.hpp @@ -262,7 +262,7 @@ class communicator : public std::enable_shared_from_this { } else { node_t rrank = req_ranks[j]; HAM_DEBUG( HAM_LOG << "communicator::communicator(): connection " << j << " requested ham-address: " << rrank << std::endl; ) - peers[rrank].tcp_socket = std::move(temp_socks[j]); // = move https://www.boost.org/doc/libs/1_65_0/doc/html/boost_asio/reference/basic_stream_socket/operator_eq_.html + peers[rrank].tcp_socket = &std::move(temp_socks[j]); // = move https://www.boost.org/doc/libs/1_65_0/doc/html/boost_asio/reference/basic_stream_socket/operator_eq_.html taken_ranks[rrank] = true; // mark the requested rank as taken HAM_DEBUG( HAM_LOG << "communicator::communicator(): associated ham-address: " << rrank << " with connection " << j << std::endl; ) // send assigned rank to target @@ -302,7 +302,7 @@ class communicator : public std::enable_shared_from_this { // host runs io_context in separate thread (asynchronous progress thread) for async operations boost::asio::io_service::work work(io_context); - std::thread thread([&io_context](){ io_context.run(); }); + std::thread thread([this](){ io_context.run(); }); } From c708644c635f8b893e24c50c5ba5c644b35db515 Mon Sep 17 00:00:00 2001 From: Daniel Deppisch Date: Wed, 31 Oct 2018 17:47:14 +0100 Subject: [PATCH 079/150] bla --- include/ham/net/communicator_tcp.hpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/include/ham/net/communicator_tcp.hpp b/include/ham/net/communicator_tcp.hpp index cb3745a..98d8bc0 100644 --- a/include/ham/net/communicator_tcp.hpp +++ b/include/ham/net/communicator_tcp.hpp @@ -212,8 +212,8 @@ class communicator : public std::enable_shared_from_this { // targets init tcp connection to host if(!is_host()) { - tcp::socket sock(io_context); // socket is always stored with index = target node, so no "if_host" switching is necessary for functions executed on host and target - peers[host_node_].tcp_socket = &sock; + tcp::socket sock = new tcp::socket(io_context); // socket is always stored with index = target node, so no "if_host" switching is necessary for functions executed on host and target + peers[host_node_].tcp_socket = sock; tcp::resolver resolver(io_context); //tcp::resolver::query query(tcp::v4(), host_address_, host_port_); //tcp::resolver::iterator iter = resolver.resolve(query); @@ -266,7 +266,7 @@ class communicator : public std::enable_shared_from_this { taken_ranks[rrank] = true; // mark the requested rank as taken HAM_DEBUG( HAM_LOG << "communicator::communicator(): associated ham-address: " << rrank << " with connection " << j << std::endl; ) // send assigned rank to target - boost::asio::write(peers[rrank].tcp_socket, boost::asio::buffer((void*)&rrank, sizeof(rrank))); + boost::asio::write(*peers[rrank].tcp_socket, boost::asio::buffer((void*)&rrank, sizeof(rrank))); } } @@ -279,7 +279,7 @@ class communicator : public std::enable_shared_from_this { HAM_DEBUG( HAM_LOG << "communicator::communicator(): associating wildcard connection: " << k << " with ham-address " << i << std::endl; ) peers[i].tcp_socket = temp_socks[k]; taken_ranks[i] = true; - boost::asio::write(peers[i].tcp_socket, boost::asio::buffer((void*)&i, sizeof(i))); + boost::asio::write(*peers[i].tcp_socket, boost::asio::buffer((void*)&i, sizeof(i))); break; // stop if free rank is assigned, go back to k-loop for next wildcard connection } } @@ -484,7 +484,7 @@ class communicator : public std::enable_shared_from_this { detail::resource_pool buffer_pool; // tcp socket - tcp::socket* tcp_socket; + tcp::socket tcp_socket; }; tcp_peer* peers; From c423ad096936376e14d3d4553383e1094f3cb8a5 Mon Sep 17 00:00:00 2001 From: Daniel Deppisch Date: Wed, 31 Oct 2018 18:09:35 +0100 Subject: [PATCH 080/150] bla --- include/ham/net/communicator_tcp.hpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/ham/net/communicator_tcp.hpp b/include/ham/net/communicator_tcp.hpp index 98d8bc0..a7d7b8f 100644 --- a/include/ham/net/communicator_tcp.hpp +++ b/include/ham/net/communicator_tcp.hpp @@ -236,12 +236,12 @@ class communicator : public std::enable_shared_from_this { tcp::acceptor acc(io_context, endpoint); node_t req_ranks[nodes_]; // store requested ranks in order of connection - tcp::socket* temp_socks = new tcp::socket[nodes_](io_context); // store sockets temporarily in connection order + tcp::socket* temp_socks[nodes_]; // store sockets temporarily in connection order bool taken_ranks[nodes_] {false}; taken_ranks[0] = true; // host rank has to be correctly provided and is therefore already taken (by the executing process) for(int i=1; i < nodes_; i++) { - temp_socks[i] = acc.accept(); // accept connection + acc.accept(temp_socks[i]); // accept connection // recv rank boost::asio::read(temp_socks[i], boost::asio::buffer((void *) &req_ranks[i], sizeof(node_t))); @@ -484,7 +484,7 @@ class communicator : public std::enable_shared_from_this { detail::resource_pool buffer_pool; // tcp socket - tcp::socket tcp_socket; + tcp::socket* tcp_socket; }; tcp_peer* peers; From e2e35df574badc9313279b3c25f6ecabb8b9041d Mon Sep 17 00:00:00 2001 From: Daniel Deppisch Date: Wed, 31 Oct 2018 18:14:30 +0100 Subject: [PATCH 081/150] bla --- include/ham/net/communicator_tcp.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/ham/net/communicator_tcp.hpp b/include/ham/net/communicator_tcp.hpp index a7d7b8f..479e33e 100644 --- a/include/ham/net/communicator_tcp.hpp +++ b/include/ham/net/communicator_tcp.hpp @@ -212,7 +212,7 @@ class communicator : public std::enable_shared_from_this { // targets init tcp connection to host if(!is_host()) { - tcp::socket sock = new tcp::socket(io_context); // socket is always stored with index = target node, so no "if_host" switching is necessary for functions executed on host and target + tcp::socket* sock = new tcp::socket(io_context); // socket is always stored with index = target node, so no "if_host" switching is necessary for functions executed on host and target peers[host_node_].tcp_socket = sock; tcp::resolver resolver(io_context); //tcp::resolver::query query(tcp::v4(), host_address_, host_port_); @@ -241,7 +241,7 @@ class communicator : public std::enable_shared_from_this { taken_ranks[0] = true; // host rank has to be correctly provided and is therefore already taken (by the executing process) for(int i=1; i < nodes_; i++) { - acc.accept(temp_socks[i]); // accept connection + acc.accept(*temp_socks[i]); // accept connection // recv rank boost::asio::read(temp_socks[i], boost::asio::buffer((void *) &req_ranks[i], sizeof(node_t))); From 50eb7cc650539fabc63f81456c85fe2e7be93131 Mon Sep 17 00:00:00 2001 From: Daniel Deppisch Date: Wed, 31 Oct 2018 18:17:06 +0100 Subject: [PATCH 082/150] bla --- include/ham/net/communicator_tcp.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/ham/net/communicator_tcp.hpp b/include/ham/net/communicator_tcp.hpp index 479e33e..1d07be3 100644 --- a/include/ham/net/communicator_tcp.hpp +++ b/include/ham/net/communicator_tcp.hpp @@ -244,7 +244,7 @@ class communicator : public std::enable_shared_from_this { acc.accept(*temp_socks[i]); // accept connection // recv rank - boost::asio::read(temp_socks[i], boost::asio::buffer((void *) &req_ranks[i], sizeof(node_t))); + boost::asio::read(*temp_socks[i], boost::asio::buffer((void *) &req_ranks[i], sizeof(node_t))); } // rearrange sockets and inform targets of resulting rank @@ -262,7 +262,7 @@ class communicator : public std::enable_shared_from_this { } else { node_t rrank = req_ranks[j]; HAM_DEBUG( HAM_LOG << "communicator::communicator(): connection " << j << " requested ham-address: " << rrank << std::endl; ) - peers[rrank].tcp_socket = &std::move(temp_socks[j]); // = move https://www.boost.org/doc/libs/1_65_0/doc/html/boost_asio/reference/basic_stream_socket/operator_eq_.html + peers[rrank].tcp_socket = temp_socks[j]; // = move https://www.boost.org/doc/libs/1_65_0/doc/html/boost_asio/reference/basic_stream_socket/operator_eq_.html taken_ranks[rrank] = true; // mark the requested rank as taken HAM_DEBUG( HAM_LOG << "communicator::communicator(): associated ham-address: " << rrank << " with connection " << j << std::endl; ) // send assigned rank to target From 1828a78d86a303ce47a3e962e908602c23d863fb Mon Sep 17 00:00:00 2001 From: Daniel Deppisch Date: Wed, 31 Oct 2018 18:20:30 +0100 Subject: [PATCH 083/150] bla --- include/ham/net/communicator_tcp.hpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/include/ham/net/communicator_tcp.hpp b/include/ham/net/communicator_tcp.hpp index 1d07be3..1bc5d1d 100644 --- a/include/ham/net/communicator_tcp.hpp +++ b/include/ham/net/communicator_tcp.hpp @@ -221,9 +221,9 @@ class communicator : public std::enable_shared_from_this { // send requested rank to host HAM_DEBUG( HAM_LOG << "communicator::communicator(): requesting ham-address " << this_node_ << "from host" << std::endl; ) - boost::asio::write(peers[host_node_].tcp_socket, boost::asio::buffer((void*)&this_node_, sizeof(this_node_))); + boost::asio::write(*peers[host_node_].tcp_socket, boost::asio::buffer((void*)&this_node_, sizeof(this_node_))); // recv rank from host - boost::asio::read(peers[host_node_].tcp_socket, boost::asio::buffer((void*)&this_node_, sizeof(this_node_))); + boost::asio::read(*peers[host_node_].tcp_socket, boost::asio::buffer((void*)&this_node_, sizeof(this_node_))); HAM_DEBUG( HAM_LOG << "communicator::communicator(): received ham-address " << this_node_ << "from host" << std::endl; ) } @@ -352,7 +352,7 @@ class communicator : public std::enable_shared_from_this { // tcp write auto self(shared_from_this()); - boost::asio::async_write(peers[req.target_node].tcp_socket, boost::asio::buffer(msg_buffer, size), + boost::asio::async_write(*peers[req.target_node].tcp_socket, boost::asio::buffer(msg_buffer, size), [this, self, &req](boost::system::error_code ec, size_t length) { req.sent_ = true; } @@ -366,7 +366,7 @@ class communicator : public std::enable_shared_from_this { { static msg_buffer buffer; // NOTE ! // MPI_Recv(&buffer, size, MPI_BYTE, host_node_, constants::DEFAULT_TAG, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - boost::asio::read(peers[host_node_].tcp_socket, boost::asio::buffer(&buffer, size)); + boost::asio::read(*peers[host_node_].tcp_socket, boost::asio::buffer(&buffer, size)); return static_cast(&buffer); } @@ -375,7 +375,7 @@ class communicator : public std::enable_shared_from_this { template void send_result(node_t target_node, T* message, size_t size) { - boost::asio::write(peers[target_node].tcp_socket, boost::asio::buffer((void*)message, size)); + boost::asio::write(*peers[target_node].tcp_socket, boost::asio::buffer((void*)message, size)); } // trigger receiving the result of an active message on the host @@ -383,7 +383,7 @@ class communicator : public std::enable_shared_from_this { { // tcp receive auto self(shared_from_this()); - boost::asio::async_read(peers[req.target_node].tcp_socket, boost::asio::buffer(static_cast(&peers[req.target_node].msg_buffers[req.recv_buffer_index]), constants::MSG_SIZE), + boost::asio::async_read(*peers[req.target_node].tcp_socket, boost::asio::buffer(static_cast(&peers[req.target_node].msg_buffers[req.recv_buffer_index]), constants::MSG_SIZE), [this, self, &req](boost::system::error_code ec, size_t length) { req.received_ = true; } @@ -397,7 +397,7 @@ class communicator : public std::enable_shared_from_this { { // tcp send - boost::asio::write(peers[remote_dest.node()].tcp_socket, boost::asio::buffer((void*)local_source, size * sizeof(T))); + boost::asio::write(*peers[remote_dest.node()].tcp_socket, boost::asio::buffer((void*)local_source, size * sizeof(T))); // MPI_Send((void*)local_source, size * sizeof(T), MPI_BYTE, remote_dest.node(), constants::DATA_TAG, MPI_COMM_WORLD); } @@ -406,7 +406,7 @@ class communicator : public std::enable_shared_from_this { void send_data_async(request_reference_type req, T* local_source, buffer_ptr remote_dest, size_t size) { auto self(shared_from_this()); - boost::asio::async_write(peers[remote_dest.node()].tcp_socket, boost::asio::buffer((void*)local_source, size*sizeof(T)), + boost::asio::async_write(*peers[remote_dest.node()].tcp_socket, boost::asio::buffer((void*)local_source, size*sizeof(T)), [this, self, &req](boost::system::error_code ec, size_t length) { req.sent_ = true; } From df30d5b12a37c92a9bfb687228842c6a80d524b7 Mon Sep 17 00:00:00 2001 From: Daniel Deppisch Date: Wed, 31 Oct 2018 18:23:38 +0100 Subject: [PATCH 084/150] bla --- include/ham/offload/offload_msg.hpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/ham/offload/offload_msg.hpp b/include/ham/offload/offload_msg.hpp index 6e709a4..01f4e9d 100644 --- a/include/ham/offload/offload_msg.hpp +++ b/include/ham/offload/offload_msg.hpp @@ -6,7 +6,9 @@ #ifndef ham_offload_offload_msg_hpp #define ham_offload_offload_msg_hpp +#ifdef HAM_COMM_MPI_RMA_DYNAMIC #include +#endif #include "ham/msg/active_msg.hpp" #include "ham/msg/execution_policy.hpp" #include "ham/misc/constants.hpp" From 2b778edc8913e566458132ec5c452650296e26ae Mon Sep 17 00:00:00 2001 From: Daniel Deppisch Date: Wed, 31 Oct 2018 18:27:53 +0100 Subject: [PATCH 085/150] fixed spelling mistake --- include/ham/net/communicator_tcp.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/ham/net/communicator_tcp.hpp b/include/ham/net/communicator_tcp.hpp index 1bc5d1d..4934e9d 100644 --- a/include/ham/net/communicator_tcp.hpp +++ b/include/ham/net/communicator_tcp.hpp @@ -419,7 +419,7 @@ class communicator : public std::enable_shared_from_this { void recv_data(buffer_ptr remote_source, T* local_dest, size_t size) { // tcp recv - boost::asio::read(peers[remote_source.node()].tpc_socket, boost::asio::buffer((void*)local_dest, size * sizeof(T))); + boost::asio::read(peers[remote_source.node()].tcp_socket, boost::asio::buffer((void*)local_dest, size * sizeof(T))); // MPI_Recv((void*)local_dest, size * sizeof(T), MPI_BYTE, remote_source.node(), constants::DATA_TAG, MPI_COMM_WORLD, MPI_STATUS_IGNORE); } From be3c8ccfeae6aeda02f769cb4faa1592e0385244 Mon Sep 17 00:00:00 2001 From: Daniel Deppisch Date: Wed, 31 Oct 2018 18:29:49 +0100 Subject: [PATCH 086/150] fixed spelling mistake --- include/ham/net/communicator_tcp.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/ham/net/communicator_tcp.hpp b/include/ham/net/communicator_tcp.hpp index 4934e9d..772578f 100644 --- a/include/ham/net/communicator_tcp.hpp +++ b/include/ham/net/communicator_tcp.hpp @@ -419,7 +419,7 @@ class communicator : public std::enable_shared_from_this { void recv_data(buffer_ptr remote_source, T* local_dest, size_t size) { // tcp recv - boost::asio::read(peers[remote_source.node()].tcp_socket, boost::asio::buffer((void*)local_dest, size * sizeof(T))); + boost::asio::read(*peers[remote_source.node()].tcp_socket, boost::asio::buffer((void*)local_dest, size * sizeof(T))); // MPI_Recv((void*)local_dest, size * sizeof(T), MPI_BYTE, remote_source.node(), constants::DATA_TAG, MPI_COMM_WORLD, MPI_STATUS_IGNORE); } @@ -428,7 +428,7 @@ class communicator : public std::enable_shared_from_this { void recv_data_async(request_reference_type req, buffer_ptr remote_source, T* local_dest, size_t size) { auto self(shared_from_this()); - boost::asio::async_read(peers[remote_source.node()].tpc_socket, boost::asio::buffer(static_cast(local_dest), size*sizeof(T)), + boost::asio::async_read(*peers[remote_source.node()].tcp_socket, boost::asio::buffer(static_cast(local_dest), size*sizeof(T)), [this, self, &req](boost::system::error_code ec, size_t length) { req.received_ = true; } From a9408d37fd2f21341b4e106d551733102e866003 Mon Sep 17 00:00:00 2001 From: Daniel Deppisch Date: Wed, 31 Oct 2018 18:51:25 +0100 Subject: [PATCH 087/150] =?UTF-8?q?trying=20something=20dumb=20to=20preven?= =?UTF-8?q?t=20a=C3=B6ready=20open=20error?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- include/ham/net/communicator_tcp.hpp | 1 + src/ham/CMakeLists.txt | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/include/ham/net/communicator_tcp.hpp b/include/ham/net/communicator_tcp.hpp index 772578f..bda7c0e 100644 --- a/include/ham/net/communicator_tcp.hpp +++ b/include/ham/net/communicator_tcp.hpp @@ -241,6 +241,7 @@ class communicator : public std::enable_shared_from_this { taken_ranks[0] = true; // host rank has to be correctly provided and is therefore already taken (by the executing process) for(int i=1; i < nodes_; i++) { + temp_socks->close(); acc.accept(*temp_socks[i]); // accept connection // recv rank diff --git a/src/ham/CMakeLists.txt b/src/ham/CMakeLists.txt index cc0df10..1652e1c 100644 --- a/src/ham/CMakeLists.txt +++ b/src/ham/CMakeLists.txt @@ -29,7 +29,7 @@ add_library(ham_offload_tcp # SHARED if BUILD_SHARED_LIBS = TRUE offload/main.cpp util/cpu_affinity.cpp) target_compile_definitions(ham_offload_tcp PUBLIC -DHAM_COMM_TCP=1) -target_link_libraries(ham_offload_tcp PUBLIC ham_interface boost_library) +target_link_libraries(ham_offload_tcp PUBLIC ham_interface boost_library pthread) set_target_properties(ham_offload_tcp PROPERTIES CXX_STANDARD 11 From 866de30fdcc899ab9395c24cea05f12451b38861 Mon Sep 17 00:00:00 2001 From: Daniel Deppisch Date: Wed, 31 Oct 2018 18:54:47 +0100 Subject: [PATCH 088/150] =?UTF-8?q?trying=20something=20dumb=20to=20preven?= =?UTF-8?q?t=20a=C3=B6ready=20open=20error?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- include/ham/net/communicator_tcp.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/ham/net/communicator_tcp.hpp b/include/ham/net/communicator_tcp.hpp index bda7c0e..1dffd27 100644 --- a/include/ham/net/communicator_tcp.hpp +++ b/include/ham/net/communicator_tcp.hpp @@ -241,7 +241,7 @@ class communicator : public std::enable_shared_from_this { taken_ranks[0] = true; // host rank has to be correctly provided and is therefore already taken (by the executing process) for(int i=1; i < nodes_; i++) { - temp_socks->close(); + temp_socks[i]->close(); acc.accept(*temp_socks[i]); // accept connection // recv rank From 3c00f53c971e910f5a56dbbfcb89c9fc6d47bcdc Mon Sep 17 00:00:00 2001 From: Daniel Deppisch Date: Wed, 31 Oct 2018 18:58:17 +0100 Subject: [PATCH 089/150] trying something less dumb to prevent already open error --- include/ham/net/communicator_tcp.hpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/include/ham/net/communicator_tcp.hpp b/include/ham/net/communicator_tcp.hpp index 1dffd27..436aba0 100644 --- a/include/ham/net/communicator_tcp.hpp +++ b/include/ham/net/communicator_tcp.hpp @@ -237,11 +237,15 @@ class communicator : public std::enable_shared_from_this { node_t req_ranks[nodes_]; // store requested ranks in order of connection tcp::socket* temp_socks[nodes_]; // store sockets temporarily in connection order + for (int l = 1; l < nodes_; ++l) { + temp_socks[l] = new tcp::socket(io_context); + } + bool taken_ranks[nodes_] {false}; taken_ranks[0] = true; // host rank has to be correctly provided and is therefore already taken (by the executing process) for(int i=1; i < nodes_; i++) { - temp_socks[i]->close(); + // temp_socks[i]->close(); acc.accept(*temp_socks[i]); // accept connection // recv rank From c4cefff27ef4e073e3be7ac058b47b4714dbf7fa Mon Sep 17 00:00:00 2001 From: Daniel Deppisch Date: Wed, 31 Oct 2018 19:03:49 +0100 Subject: [PATCH 090/150] fixed check for illegal rank request --- include/ham/net/communicator_tcp.hpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/ham/net/communicator_tcp.hpp b/include/ham/net/communicator_tcp.hpp index 436aba0..ac136ab 100644 --- a/include/ham/net/communicator_tcp.hpp +++ b/include/ham/net/communicator_tcp.hpp @@ -220,7 +220,7 @@ class communicator : public std::enable_shared_from_this { boost::asio::connect(*peers[host_node_].tcp_socket, resolver.resolve(host_address_, host_port_)); // send requested rank to host - HAM_DEBUG( HAM_LOG << "communicator::communicator(): requesting ham-address " << this_node_ << "from host" << std::endl; ) + HAM_DEBUG( HAM_LOG << "communicator::communicator(): requesting ham-address " << this_node_ << " from host" << std::endl; ) boost::asio::write(*peers[host_node_].tcp_socket, boost::asio::buffer((void*)&this_node_, sizeof(this_node_))); // recv rank from host boost::asio::read(*peers[host_node_].tcp_socket, boost::asio::buffer((void*)&this_node_, sizeof(this_node_))); @@ -254,8 +254,8 @@ class communicator : public std::enable_shared_from_this { // rearrange sockets and inform targets of resulting rank for (int j = 1; j < nodes_; ++j) { - if(req_ranks[j] < -1 || req_ranks[j] > nodes_-1) { // check if rank invalid - std::cout << "communicator::communicator(): illegal ham-address requested:" << req_ranks[j] << std::endl; + if((req_ranks[j] < -1) || (req_ranks[j] > nodes_-1)) { // check if rank invalid + std::cout << "communicator::communicator(): illegal ham-address requested: " << req_ranks[j] << std::endl; exit(-1); }else if(req_ranks[j] == -1) { // skip wildcard ranks, handled later to avoid conflicting ranks with following connects HAM_DEBUG( HAM_LOG << "communicator::communicator(): connection " << j << " requested wildcard ham-address" << std::endl; ) From 63736e06d207809ceb3f0b6ee293ca72d1e5e6e9 Mon Sep 17 00:00:00 2001 From: Daniel Deppisch Date: Wed, 31 Oct 2018 19:12:20 +0100 Subject: [PATCH 091/150] fixed check for illegal rank request --- include/ham/net/communicator_tcp.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/ham/net/communicator_tcp.hpp b/include/ham/net/communicator_tcp.hpp index ac136ab..a6b012f 100644 --- a/include/ham/net/communicator_tcp.hpp +++ b/include/ham/net/communicator_tcp.hpp @@ -254,7 +254,7 @@ class communicator : public std::enable_shared_from_this { // rearrange sockets and inform targets of resulting rank for (int j = 1; j < nodes_; ++j) { - if((req_ranks[j] < -1) || (req_ranks[j] > nodes_-1)) { // check if rank invalid + if((req_ranks[j] < -1) || (req_ranks[j] > (nodes_-1))) { // check if rank invalid std::cout << "communicator::communicator(): illegal ham-address requested: " << req_ranks[j] << std::endl; exit(-1); }else if(req_ranks[j] == -1) { // skip wildcard ranks, handled later to avoid conflicting ranks with following connects From 4a9cf736ecfd5cd4f55cf1bacef6c13a25c5bcf9 Mon Sep 17 00:00:00 2001 From: Daniel Deppisch Date: Wed, 31 Oct 2018 19:18:05 +0100 Subject: [PATCH 092/150] fixed check for illegal rank request --- include/ham/net/communicator_tcp.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/ham/net/communicator_tcp.hpp b/include/ham/net/communicator_tcp.hpp index a6b012f..892ff22 100644 --- a/include/ham/net/communicator_tcp.hpp +++ b/include/ham/net/communicator_tcp.hpp @@ -254,7 +254,7 @@ class communicator : public std::enable_shared_from_this { // rearrange sockets and inform targets of resulting rank for (int j = 1; j < nodes_; ++j) { - if((req_ranks[j] < -1) || (req_ranks[j] > (nodes_-1))) { // check if rank invalid + if((req_ranks[j] > (nodes_-1))) { // check if rank invalid std::cout << "communicator::communicator(): illegal ham-address requested: " << req_ranks[j] << std::endl; exit(-1); }else if(req_ranks[j] == -1) { // skip wildcard ranks, handled later to avoid conflicting ranks with following connects From 4afa1568cfb5eea462517312943064b3b582d586 Mon Sep 17 00:00:00 2001 From: Daniel Deppisch Date: Wed, 31 Oct 2018 19:28:06 +0100 Subject: [PATCH 093/150] fixed check for illegal rank request (kinda) --- include/ham/net/communicator_tcp.hpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/include/ham/net/communicator_tcp.hpp b/include/ham/net/communicator_tcp.hpp index 892ff22..e99568a 100644 --- a/include/ham/net/communicator_tcp.hpp +++ b/include/ham/net/communicator_tcp.hpp @@ -224,7 +224,7 @@ class communicator : public std::enable_shared_from_this { boost::asio::write(*peers[host_node_].tcp_socket, boost::asio::buffer((void*)&this_node_, sizeof(this_node_))); // recv rank from host boost::asio::read(*peers[host_node_].tcp_socket, boost::asio::buffer((void*)&this_node_, sizeof(this_node_))); - HAM_DEBUG( HAM_LOG << "communicator::communicator(): received ham-address " << this_node_ << "from host" << std::endl; ) + HAM_DEBUG( HAM_LOG << "communicator::communicator(): received ham-address " << this_node_ << " from host" << std::endl; ) } // host accepts tcp connection from targets @@ -254,7 +254,7 @@ class communicator : public std::enable_shared_from_this { // rearrange sockets and inform targets of resulting rank for (int j = 1; j < nodes_; ++j) { - if((req_ranks[j] > (nodes_-1))) { // check if rank invalid + if((req_ranks[j] > (nodes_-1))) { // check if rank invalid // TODO: fix -1 wildcard, currently not possible because req_ranks is unsigned node_t=size_t std::cout << "communicator::communicator(): illegal ham-address requested: " << req_ranks[j] << std::endl; exit(-1); }else if(req_ranks[j] == -1) { // skip wildcard ranks, handled later to avoid conflicting ranks with following connects @@ -305,9 +305,13 @@ class communicator : public std::enable_shared_from_this { } } + HAM_DEBUG( HAM_LOG << "communicator::communicator(): initializing buffers done" << std::endl; ) + // host runs io_context in separate thread (asynchronous progress thread) for async operations boost::asio::io_service::work work(io_context); std::thread thread([this](){ io_context.run(); }); + + HAM_DEBUG( HAM_LOG << "communicator::communicator(): async thread started" << std::endl; ) } From abd32cf979ed0f3f570223fe0c7e32e804b7b8b8 Mon Sep 17 00:00:00 2001 From: Daniel Deppisch Date: Wed, 31 Oct 2018 19:50:05 +0100 Subject: [PATCH 094/150] fixed async thread terminating early (wip) --- include/ham/net/communicator_tcp.hpp | 1 + 1 file changed, 1 insertion(+) diff --git a/include/ham/net/communicator_tcp.hpp b/include/ham/net/communicator_tcp.hpp index e99568a..528978f 100644 --- a/include/ham/net/communicator_tcp.hpp +++ b/include/ham/net/communicator_tcp.hpp @@ -310,6 +310,7 @@ class communicator : public std::enable_shared_from_this { // host runs io_context in separate thread (asynchronous progress thread) for async operations boost::asio::io_service::work work(io_context); std::thread thread([this](){ io_context.run(); }); + thread.detach(); HAM_DEBUG( HAM_LOG << "communicator::communicator(): async thread started" << std::endl; ) } From d053cf78c40c40fe032e6cf55da5d81d15124c01 Mon Sep 17 00:00:00 2001 From: Daniel Deppisch Date: Fri, 2 Nov 2018 17:26:42 +0100 Subject: [PATCH 095/150] removed shared pointers --- include/ham/net/communicator_tcp.hpp | 28 ++++++++++++++++------------ src/benchmark_ham_offload.cpp | 6 +++++- 2 files changed, 21 insertions(+), 13 deletions(-) diff --git a/include/ham/net/communicator_tcp.hpp b/include/ham/net/communicator_tcp.hpp index 528978f..6588319 100644 --- a/include/ham/net/communicator_tcp.hpp +++ b/include/ham/net/communicator_tcp.hpp @@ -10,6 +10,7 @@ #include // memcpy #include // posix_memalign #include // async thread +// #include // std::shared_ptr #include #include @@ -60,7 +61,7 @@ class node_descriptor friend class net::communicator; }; -class communicator : public std::enable_shared_from_this { +class communicator { // : public std::enable_shared_from_this public: // externally used interface of request must be shared across all communicator-implementations class request { @@ -361,9 +362,9 @@ class communicator : public std::enable_shared_from_this { memcpy(msg_buffer, msg, size); // tcp write - auto self(shared_from_this()); + // auto self(shared_from_this()); boost::asio::async_write(*peers[req.target_node].tcp_socket, boost::asio::buffer(msg_buffer, size), - [this, self, &req](boost::system::error_code ec, size_t length) { + [&req](boost::system::error_code ec, size_t length) { req.sent_ = true; } ); @@ -380,6 +381,7 @@ class communicator : public std::enable_shared_from_this { return static_cast(&buffer); } + // target only -> sync // send result through communicator // only to be used by request.send_result() template @@ -388,13 +390,14 @@ class communicator : public std::enable_shared_from_this { boost::asio::write(*peers[target_node].tcp_socket, boost::asio::buffer((void*)message, size)); } + // host only -> async // trigger receiving the result of an active message on the host void recv_result(request_reference_type req) { // tcp receive - auto self(shared_from_this()); + // auto self(shared_from_this()); boost::asio::async_read(*peers[req.target_node].tcp_socket, boost::asio::buffer(static_cast(&peers[req.target_node].msg_buffers[req.recv_buffer_index]), constants::MSG_SIZE), - [this, self, &req](boost::system::error_code ec, size_t length) { + [&req](boost::system::error_code ec, size_t length) { req.received_ = true; } ); @@ -402,29 +405,30 @@ class communicator : public std::enable_shared_from_this { return; } + // target only, host never uses sync variant template void send_data(T* local_source, buffer_ptr remote_dest, size_t size) { // tcp send - boost::asio::write(*peers[remote_dest.node()].tcp_socket, boost::asio::buffer((void*)local_source, size * sizeof(T))); + // MPI_Send((void*)local_source, size * sizeof(T), MPI_BYTE, remote_dest.node(), constants::DATA_TAG, MPI_COMM_WORLD); } - // to be used by the host + // host only template void send_data_async(request_reference_type req, T* local_source, buffer_ptr remote_dest, size_t size) { - auto self(shared_from_this()); + // auto self(shared_from_this()); boost::asio::async_write(*peers[remote_dest.node()].tcp_socket, boost::asio::buffer((void*)local_source, size*sizeof(T)), - [this, self, &req](boost::system::error_code ec, size_t length) { + [&req](boost::system::error_code ec, size_t length) { req.sent_ = true; } ); // MPI_Isend((void*)local_source, size * sizeof(T), MPI_BYTE, remote_dest.node(), constants::DATA_TAG, MPI_COMM_WORLD, &req.next_mpi_request()); } - + // target only template void recv_data(buffer_ptr remote_source, T* local_dest, size_t size) { @@ -437,9 +441,9 @@ class communicator : public std::enable_shared_from_this { template void recv_data_async(request_reference_type req, buffer_ptr remote_source, T* local_dest, size_t size) { - auto self(shared_from_this()); + // auto self(shared_from_this()); boost::asio::async_read(*peers[remote_source.node()].tcp_socket, boost::asio::buffer(static_cast(local_dest), size*sizeof(T)), - [this, self, &req](boost::system::error_code ec, size_t length) { + [&req](boost::system::error_code ec, size_t length) { req.received_ = true; } ); diff --git a/src/benchmark_ham_offload.cpp b/src/benchmark_ham_offload.cpp index 3e55ec7..3b56e3a 100644 --- a/src/benchmark_ham_offload.cpp +++ b/src/benchmark_ham_offload.cpp @@ -165,7 +165,11 @@ int main(int argc, char * argv[]) #else std::cout << "# COMM_MPI_RMA_DYNAMIC disabled" << std::endl; #endif - + #ifdef HAM_COMM_TCP + std::cout << "# COMM_TCP enabled" << std::endl; + #else + std::cout << "# COMM_TCP disabled" << std::endl; +#endif #ifdef HAM_COMM_SCIF std::cout << "# HAM_COMM_SCIF enabled" << std::endl; #ifdef HAM_SCIF_RMA_CPU From 2721d16e83495ce7fd587da7da6c0b6333fef320 Mon Sep 17 00:00:00 2001 From: Daniel Deppisch Date: Fri, 2 Nov 2018 17:36:37 +0100 Subject: [PATCH 096/150] changed std::thread invocation --- include/ham/net/communicator_tcp.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/ham/net/communicator_tcp.hpp b/include/ham/net/communicator_tcp.hpp index 6588319..4e2377d 100644 --- a/include/ham/net/communicator_tcp.hpp +++ b/include/ham/net/communicator_tcp.hpp @@ -310,7 +310,7 @@ class communicator { // : public std::enable_shared_from_this // host runs io_context in separate thread (asynchronous progress thread) for async operations boost::asio::io_service::work work(io_context); - std::thread thread([this](){ io_context.run(); }); + std::thread thread([this, &io_context](){ io_context.run(); }); thread.detach(); HAM_DEBUG( HAM_LOG << "communicator::communicator(): async thread started" << std::endl; ) From 2581f11d0077e6a2f10ae48f35162731419d89d4 Mon Sep 17 00:00:00 2001 From: Daniel Deppisch Date: Fri, 2 Nov 2018 17:55:47 +0100 Subject: [PATCH 097/150] debugging async ops not completing --- include/ham/net/communicator_tcp.hpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/include/ham/net/communicator_tcp.hpp b/include/ham/net/communicator_tcp.hpp index 4e2377d..4548f9c 100644 --- a/include/ham/net/communicator_tcp.hpp +++ b/include/ham/net/communicator_tcp.hpp @@ -310,7 +310,12 @@ class communicator { // : public std::enable_shared_from_this // host runs io_context in separate thread (asynchronous progress thread) for async operations boost::asio::io_service::work work(io_context); - std::thread thread([this, &io_context](){ io_context.run(); }); + std::thread thread([this](){ + HAM_DEBUG( HAM_LOG << "ASYNC THREAD: Heyooo, I live." << std::endl; ) + io_context.run(); + HAM_DEBUG( HAM_LOG << "ASYNC THREAD: Oh noes, I'm dead!" << std::endl; ) + } + ); thread.detach(); HAM_DEBUG( HAM_LOG << "communicator::communicator(): async thread started" << std::endl; ) From 4d88a32032bc947f522da71e9d3b3d4ef9f4cc3e Mon Sep 17 00:00:00 2001 From: Daniel Deppisch Date: Fri, 2 Nov 2018 17:59:57 +0100 Subject: [PATCH 098/150] debugging async ops not completing --- include/ham/net/communicator_tcp.hpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/ham/net/communicator_tcp.hpp b/include/ham/net/communicator_tcp.hpp index 4548f9c..cf109a7 100644 --- a/include/ham/net/communicator_tcp.hpp +++ b/include/ham/net/communicator_tcp.hpp @@ -309,9 +309,10 @@ class communicator { // : public std::enable_shared_from_this HAM_DEBUG( HAM_LOG << "communicator::communicator(): initializing buffers done" << std::endl; ) // host runs io_context in separate thread (asynchronous progress thread) for async operations - boost::asio::io_service::work work(io_context); + std::thread thread([this](){ HAM_DEBUG( HAM_LOG << "ASYNC THREAD: Heyooo, I live." << std::endl; ) + boost::asio::io_service::work work(io_context); io_context.run(); HAM_DEBUG( HAM_LOG << "ASYNC THREAD: Oh noes, I'm dead!" << std::endl; ) } From 08b774c31f5af321ae502560ce1f562b62a2d563 Mon Sep 17 00:00:00 2001 From: Daniel Deppisch Date: Fri, 2 Nov 2018 18:07:25 +0100 Subject: [PATCH 099/150] debugging async ops not completing --- include/ham/net/communicator_tcp.hpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/include/ham/net/communicator_tcp.hpp b/include/ham/net/communicator_tcp.hpp index cf109a7..8099735 100644 --- a/include/ham/net/communicator_tcp.hpp +++ b/include/ham/net/communicator_tcp.hpp @@ -370,8 +370,9 @@ class communicator { // : public std::enable_shared_from_this // tcp write // auto self(shared_from_this()); boost::asio::async_write(*peers[req.target_node].tcp_socket, boost::asio::buffer(msg_buffer, size), - [&req](boost::system::error_code ec, size_t length) { + [this, &req](boost::system::error_code ec, size_t length) { req.sent_ = true; + HAM_DEBUG( HAM_LOG << "THREAD: Async completion handler executed" << std::endl; ) } ); // MPI_Isend(msg_buffer, size, MPI_BYTE, req.target_node, constants::DEFAULT_TAG, MPI_COMM_WORLD, &req.next_mpi_request()); @@ -403,7 +404,7 @@ class communicator { // : public std::enable_shared_from_this // tcp receive // auto self(shared_from_this()); boost::asio::async_read(*peers[req.target_node].tcp_socket, boost::asio::buffer(static_cast(&peers[req.target_node].msg_buffers[req.recv_buffer_index]), constants::MSG_SIZE), - [&req](boost::system::error_code ec, size_t length) { + [this, &req](boost::system::error_code ec, size_t length) { req.received_ = true; } ); From fd02c36483a1fbe2ced17d138543ad2242ae51df Mon Sep 17 00:00:00 2001 From: Daniel Deppisch Date: Fri, 2 Nov 2018 18:10:59 +0100 Subject: [PATCH 100/150] debugging async ops not completing --- include/ham/net/communicator_tcp.hpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/include/ham/net/communicator_tcp.hpp b/include/ham/net/communicator_tcp.hpp index 8099735..9b25336 100644 --- a/include/ham/net/communicator_tcp.hpp +++ b/include/ham/net/communicator_tcp.hpp @@ -369,10 +369,12 @@ class communicator { // : public std::enable_shared_from_this // tcp write // auto self(shared_from_this()); + HAM_DEBUG( HAM_LOG << "communicator::send_msg(): sending msg to: " << req.target_node << std::endl; ) + boost::asio::async_write(*peers[req.target_node].tcp_socket, boost::asio::buffer(msg_buffer, size), [this, &req](boost::system::error_code ec, size_t length) { req.sent_ = true; - HAM_DEBUG( HAM_LOG << "THREAD: Async completion handler executed" << std::endl; ) + HAM_DEBUG( HAM_LOG << "THREAD: Async completion handler executed, send_msg() completed" << std::endl; ) } ); // MPI_Isend(msg_buffer, size, MPI_BYTE, req.target_node, constants::DEFAULT_TAG, MPI_COMM_WORLD, &req.next_mpi_request()); From 95095eab73bfe2e1371aa7512b98999b356780cd Mon Sep 17 00:00:00 2001 From: Daniel Deppisch Date: Fri, 2 Nov 2018 18:13:45 +0100 Subject: [PATCH 101/150] debugging async ops not completing --- include/ham/net/communicator_tcp.hpp | 1 + 1 file changed, 1 insertion(+) diff --git a/include/ham/net/communicator_tcp.hpp b/include/ham/net/communicator_tcp.hpp index 9b25336..bb79915 100644 --- a/include/ham/net/communicator_tcp.hpp +++ b/include/ham/net/communicator_tcp.hpp @@ -386,6 +386,7 @@ class communicator { // : public std::enable_shared_from_this { static msg_buffer buffer; // NOTE ! // MPI_Recv(&buffer, size, MPI_BYTE, host_node_, constants::DEFAULT_TAG, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + HAM_DEBUG( HAM_LOG << "communicator::recv_msg_host(): node " << this_node_ << " awaiting AM from host" << std::endl; ) boost::asio::read(*peers[host_node_].tcp_socket, boost::asio::buffer(&buffer, size)); return static_cast(&buffer); } From cd4f43148fd0d0ec4b2d405de305249edafca233 Mon Sep 17 00:00:00 2001 From: Daniel Deppisch Date: Fri, 2 Nov 2018 18:26:20 +0100 Subject: [PATCH 102/150] debugging async ops not completing --- include/ham/net/communicator_tcp.hpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/include/ham/net/communicator_tcp.hpp b/include/ham/net/communicator_tcp.hpp index bb79915..d553695 100644 --- a/include/ham/net/communicator_tcp.hpp +++ b/include/ham/net/communicator_tcp.hpp @@ -406,9 +406,12 @@ class communicator { // : public std::enable_shared_from_this { // tcp receive // auto self(shared_from_this()); + HAM_DEBUG( HAM_LOG << "communicator::recv_result(): receiving msg from: " << req.target_node << std::endl; ) + boost::asio::async_read(*peers[req.target_node].tcp_socket, boost::asio::buffer(static_cast(&peers[req.target_node].msg_buffers[req.recv_buffer_index]), constants::MSG_SIZE), [this, &req](boost::system::error_code ec, size_t length) { req.received_ = true; + HAM_DEBUG( HAM_LOG << "THREAD: Async completion handler executed, recv_result() completed " << req.target_node << std::endl; ) } ); // MPI_Irecv(static_cast(&peers[req.target_node].msg_buffers[req.recv_buffer_index]), constants::MSG_SIZE, MPI_BYTE, req.target_node, constants::RESULT_TAG, MPI_COMM_WORLD, &req.next_mpi_request()); From 3996db0975f1e1f0e2927a121b57fcceae724104 Mon Sep 17 00:00:00 2001 From: Daniel Deppisch Date: Fri, 2 Nov 2018 18:34:34 +0100 Subject: [PATCH 103/150] debugging async ops not completing --- include/ham/net/communicator_tcp.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/ham/net/communicator_tcp.hpp b/include/ham/net/communicator_tcp.hpp index d553695..5fc749f 100644 --- a/include/ham/net/communicator_tcp.hpp +++ b/include/ham/net/communicator_tcp.hpp @@ -374,7 +374,7 @@ class communicator { // : public std::enable_shared_from_this boost::asio::async_write(*peers[req.target_node].tcp_socket, boost::asio::buffer(msg_buffer, size), [this, &req](boost::system::error_code ec, size_t length) { req.sent_ = true; - HAM_DEBUG( HAM_LOG << "THREAD: Async completion handler executed, send_msg() completed" << std::endl; ) + HAM_DEBUG( HAM_LOG << "THREAD: Async completion handler executed, send_msg() to " << req.target_node << " completed" << std::endl; ) } ); // MPI_Isend(msg_buffer, size, MPI_BYTE, req.target_node, constants::DEFAULT_TAG, MPI_COMM_WORLD, &req.next_mpi_request()); From 354e1d2b712457889532f7c65f1e95f5fec11eed Mon Sep 17 00:00:00 2001 From: Daniel Deppisch Date: Fri, 2 Nov 2018 18:41:37 +0100 Subject: [PATCH 104/150] debugging async ops not completing --- include/ham/net/communicator_tcp.hpp | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/include/ham/net/communicator_tcp.hpp b/include/ham/net/communicator_tcp.hpp index 5fc749f..ff53282 100644 --- a/include/ham/net/communicator_tcp.hpp +++ b/include/ham/net/communicator_tcp.hpp @@ -373,8 +373,13 @@ class communicator { // : public std::enable_shared_from_this boost::asio::async_write(*peers[req.target_node].tcp_socket, boost::asio::buffer(msg_buffer, size), [this, &req](boost::system::error_code ec, size_t length) { - req.sent_ = true; - HAM_DEBUG( HAM_LOG << "THREAD: Async completion handler executed, send_msg() to " << req.target_node << " completed" << std::endl; ) + if (!ec) + { + req.sent_ = true; + HAM_DEBUG( HAM_LOG << "THREAD: Async completion handler executed, send_msg() to " << req.target_node << " completed" << std::endl; ) + } else { + HAM_DEBUG( HAM_LOG << "THREAD: Async completion handler executed, failed to send_msg() to " << req.target_node << " Error: " << ec.message() << std::endl; ) + } } ); // MPI_Isend(msg_buffer, size, MPI_BYTE, req.target_node, constants::DEFAULT_TAG, MPI_COMM_WORLD, &req.next_mpi_request()); From aa18deb421de331110302364471e8227a85e6ff8 Mon Sep 17 00:00:00 2001 From: Daniel Deppisch Date: Fri, 2 Nov 2018 18:44:38 +0100 Subject: [PATCH 105/150] debugging async ops not completing --- include/ham/net/communicator_tcp.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/ham/net/communicator_tcp.hpp b/include/ham/net/communicator_tcp.hpp index ff53282..b0536cb 100644 --- a/include/ham/net/communicator_tcp.hpp +++ b/include/ham/net/communicator_tcp.hpp @@ -376,7 +376,7 @@ class communicator { // : public std::enable_shared_from_this if (!ec) { req.sent_ = true; - HAM_DEBUG( HAM_LOG << "THREAD: Async completion handler executed, send_msg() to " << req.target_node << " completed" << std::endl; ) + HAM_DEBUG( HAM_LOG << "THREAD: Async completion handler executed, send_msg() to " << req.target_node << " completed. Wrote " << length << " Bytes." << std::endl; ) } else { HAM_DEBUG( HAM_LOG << "THREAD: Async completion handler executed, failed to send_msg() to " << req.target_node << " Error: " << ec.message() << std::endl; ) } From 7244c92e7de8e0dd69550f64016d2b0a640d5269 Mon Sep 17 00:00:00 2001 From: Daniel Deppisch Date: Fri, 2 Nov 2018 18:53:33 +0100 Subject: [PATCH 106/150] debugging async ops not completing --- include/ham/net/communicator_tcp.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/ham/net/communicator_tcp.hpp b/include/ham/net/communicator_tcp.hpp index b0536cb..1171fbd 100644 --- a/include/ham/net/communicator_tcp.hpp +++ b/include/ham/net/communicator_tcp.hpp @@ -392,7 +392,7 @@ class communicator { // : public std::enable_shared_from_this static msg_buffer buffer; // NOTE ! // MPI_Recv(&buffer, size, MPI_BYTE, host_node_, constants::DEFAULT_TAG, MPI_COMM_WORLD, MPI_STATUS_IGNORE); HAM_DEBUG( HAM_LOG << "communicator::recv_msg_host(): node " << this_node_ << " awaiting AM from host" << std::endl; ) - boost::asio::read(*peers[host_node_].tcp_socket, boost::asio::buffer(&buffer, size)); + boost::asio::read(*peers[host_node_].tcp_socket, boost::asio::buffer(&buffer, 72 /*size*/)); return static_cast(&buffer); } From 922e9daf4ddecdf5a5d965941cdc5fd8294aa76d Mon Sep 17 00:00:00 2001 From: Daniel Deppisch Date: Fri, 2 Nov 2018 19:34:34 +0100 Subject: [PATCH 107/150] adding delimiter to AM transfers --- include/ham/misc/constants.hpp | 1 + include/ham/net/communicator_tcp.hpp | 8 +++++--- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/include/ham/misc/constants.hpp b/include/ham/misc/constants.hpp index b37c690..fba5c4c 100644 --- a/include/ham/misc/constants.hpp +++ b/include/ham/misc/constants.hpp @@ -19,6 +19,7 @@ enum net { MSG_BUFFERS = 256, DATA_PUT_CODE = 1, DATA_GET_CODE = 2, + TCP_DELIM = "\r\n\r\n", }; enum arch { diff --git a/include/ham/net/communicator_tcp.hpp b/include/ham/net/communicator_tcp.hpp index 1171fbd..aabd741 100644 --- a/include/ham/net/communicator_tcp.hpp +++ b/include/ham/net/communicator_tcp.hpp @@ -142,7 +142,7 @@ class communicator { // : public std::enable_shared_from_this typedef request& request_reference_type; typedef const request& request_const_reference_type; - communicator(int argc, char* argv[]) : node_desc_dummy() + communicator(int argc, char* argv[]) : node_desc_dummy(), delim(constants::TCP_DELIM) { HAM_DEBUG( HAM_LOG << "communicator::communicator(): initialising configuration" << std::endl; ) @@ -366,12 +366,13 @@ class communicator { // : public std::enable_shared_from_this // copy message from caller into transfer buffer void* msg_buffer = static_cast(&peers[req.target_node].msg_buffers[req.send_buffer_index]); memcpy(msg_buffer, msg, size); + memcpy(msg_buffer+size, delim.c_str(), delim.size()); // add tcp delimiter to message is defined in // tcp write // auto self(shared_from_this()); HAM_DEBUG( HAM_LOG << "communicator::send_msg(): sending msg to: " << req.target_node << std::endl; ) - boost::asio::async_write(*peers[req.target_node].tcp_socket, boost::asio::buffer(msg_buffer, size), + boost::asio::async_write(*peers[req.target_node].tcp_socket, boost::asio::buffer(msg_buffer, size+delim.size()), [this, &req](boost::system::error_code ec, size_t length) { if (!ec) { @@ -392,7 +393,7 @@ class communicator { // : public std::enable_shared_from_this static msg_buffer buffer; // NOTE ! // MPI_Recv(&buffer, size, MPI_BYTE, host_node_, constants::DEFAULT_TAG, MPI_COMM_WORLD, MPI_STATUS_IGNORE); HAM_DEBUG( HAM_LOG << "communicator::recv_msg_host(): node " << this_node_ << " awaiting AM from host" << std::endl; ) - boost::asio::read(*peers[host_node_].tcp_socket, boost::asio::buffer(&buffer, 72 /*size*/)); + boost::asio::read_until(*peers[host_node_].tcp_socket, &buffer, delim); return static_cast(&buffer); } @@ -507,6 +508,7 @@ class communicator { // : public std::enable_shared_from_this std::string host_port_; node_descriptor node_desc_dummy; boost::asio::io_service io_context; + const std::string delim; struct tcp_peer { buffer_ptr msg_buffers; // buffers used for MPI_ISend and IRecv by the sender From 2878033412ff77f2320122025c21cbc8737a14c0 Mon Sep 17 00:00:00 2001 From: Daniel Deppisch Date: Fri, 2 Nov 2018 19:38:30 +0100 Subject: [PATCH 108/150] adding delimiter to AM transfers --- include/ham/misc/constants.hpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/include/ham/misc/constants.hpp b/include/ham/misc/constants.hpp index fba5c4c..9f9fa70 100644 --- a/include/ham/misc/constants.hpp +++ b/include/ham/misc/constants.hpp @@ -10,6 +10,9 @@ #ifndef HAM_MESSAGE_SIZE #define HAM_MESSAGE_SIZE 4096 #endif +#ifndef HAM_TCP_DELIM +#define HAM_TCP_DELIM "\r\n\r\n" +#endif namespace ham { namespace constants { @@ -19,7 +22,6 @@ enum net { MSG_BUFFERS = 256, DATA_PUT_CODE = 1, DATA_GET_CODE = 2, - TCP_DELIM = "\r\n\r\n", }; enum arch { From fa33ca55bbd78419c6b0fc2ff9e6c20cdc8aa99e Mon Sep 17 00:00:00 2001 From: Daniel Deppisch Date: Fri, 2 Nov 2018 19:39:08 +0100 Subject: [PATCH 109/150] adding delimiter to AM transfers --- include/ham/net/communicator_tcp.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/ham/net/communicator_tcp.hpp b/include/ham/net/communicator_tcp.hpp index aabd741..4e24b49 100644 --- a/include/ham/net/communicator_tcp.hpp +++ b/include/ham/net/communicator_tcp.hpp @@ -142,7 +142,7 @@ class communicator { // : public std::enable_shared_from_this typedef request& request_reference_type; typedef const request& request_const_reference_type; - communicator(int argc, char* argv[]) : node_desc_dummy(), delim(constants::TCP_DELIM) + communicator(int argc, char* argv[]) : node_desc_dummy(), delim(HAM_TCP_DELIM) { HAM_DEBUG( HAM_LOG << "communicator::communicator(): initialising configuration" << std::endl; ) From d520e2cf4fc1717d1af658a8b15ebfcff8c7c465 Mon Sep 17 00:00:00 2001 From: Daniel Deppisch Date: Fri, 2 Nov 2018 19:41:44 +0100 Subject: [PATCH 110/150] adding delimiter to AM transfers --- include/ham/net/communicator_tcp.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/ham/net/communicator_tcp.hpp b/include/ham/net/communicator_tcp.hpp index 4e24b49..1bba878 100644 --- a/include/ham/net/communicator_tcp.hpp +++ b/include/ham/net/communicator_tcp.hpp @@ -393,7 +393,7 @@ class communicator { // : public std::enable_shared_from_this static msg_buffer buffer; // NOTE ! // MPI_Recv(&buffer, size, MPI_BYTE, host_node_, constants::DEFAULT_TAG, MPI_COMM_WORLD, MPI_STATUS_IGNORE); HAM_DEBUG( HAM_LOG << "communicator::recv_msg_host(): node " << this_node_ << " awaiting AM from host" << std::endl; ) - boost::asio::read_until(*peers[host_node_].tcp_socket, &buffer, delim); + boost::asio::read_until(*peers[host_node_].tcp_socket, buffer, delim); return static_cast(&buffer); } From 5fdba9fdc0392d01af0e8c86a888cb2fca32babd Mon Sep 17 00:00:00 2001 From: Daniel Deppisch Date: Fri, 2 Nov 2018 19:48:28 +0100 Subject: [PATCH 111/150] removed delimiters, just send full MSG_SIZE --- include/ham/misc/constants.hpp | 3 --- include/ham/net/communicator_tcp.hpp | 12 +++++------- 2 files changed, 5 insertions(+), 10 deletions(-) diff --git a/include/ham/misc/constants.hpp b/include/ham/misc/constants.hpp index 9f9fa70..b37c690 100644 --- a/include/ham/misc/constants.hpp +++ b/include/ham/misc/constants.hpp @@ -10,9 +10,6 @@ #ifndef HAM_MESSAGE_SIZE #define HAM_MESSAGE_SIZE 4096 #endif -#ifndef HAM_TCP_DELIM -#define HAM_TCP_DELIM "\r\n\r\n" -#endif namespace ham { namespace constants { diff --git a/include/ham/net/communicator_tcp.hpp b/include/ham/net/communicator_tcp.hpp index 1bba878..87baee7 100644 --- a/include/ham/net/communicator_tcp.hpp +++ b/include/ham/net/communicator_tcp.hpp @@ -142,7 +142,7 @@ class communicator { // : public std::enable_shared_from_this typedef request& request_reference_type; typedef const request& request_const_reference_type; - communicator(int argc, char* argv[]) : node_desc_dummy(), delim(HAM_TCP_DELIM) + communicator(int argc, char* argv[]) : node_desc_dummy() { HAM_DEBUG( HAM_LOG << "communicator::communicator(): initialising configuration" << std::endl; ) @@ -366,13 +366,12 @@ class communicator { // : public std::enable_shared_from_this // copy message from caller into transfer buffer void* msg_buffer = static_cast(&peers[req.target_node].msg_buffers[req.send_buffer_index]); memcpy(msg_buffer, msg, size); - memcpy(msg_buffer+size, delim.c_str(), delim.size()); // add tcp delimiter to message is defined in // tcp write // auto self(shared_from_this()); HAM_DEBUG( HAM_LOG << "communicator::send_msg(): sending msg to: " << req.target_node << std::endl; ) - - boost::asio::async_write(*peers[req.target_node].tcp_socket, boost::asio::buffer(msg_buffer, size+delim.size()), + //always write full message size TODO(improvement): improve with delimiter and read_until @ target + boost::asio::async_write(*peers[req.target_node].tcp_socket, boost::asio::buffer(msg_buffer, constants::MSG_SIZE), [this, &req](boost::system::error_code ec, size_t length) { if (!ec) { @@ -393,7 +392,7 @@ class communicator { // : public std::enable_shared_from_this static msg_buffer buffer; // NOTE ! // MPI_Recv(&buffer, size, MPI_BYTE, host_node_, constants::DEFAULT_TAG, MPI_COMM_WORLD, MPI_STATUS_IGNORE); HAM_DEBUG( HAM_LOG << "communicator::recv_msg_host(): node " << this_node_ << " awaiting AM from host" << std::endl; ) - boost::asio::read_until(*peers[host_node_].tcp_socket, buffer, delim); + boost::asio::read(*peers[host_node_].tcp_socket, boost::asio::buffer(&buffer, size)); // will always read full MSG_SIZE return static_cast(&buffer); } @@ -508,8 +507,7 @@ class communicator { // : public std::enable_shared_from_this std::string host_port_; node_descriptor node_desc_dummy; boost::asio::io_service io_context; - const std::string delim; - + struct tcp_peer { buffer_ptr msg_buffers; // buffers used for MPI_ISend and IRecv by the sender From 77173ff6ed7cbff818bbcbdf835075218d96f2e2 Mon Sep 17 00:00:00 2001 From: Daniel Deppisch Date: Fri, 2 Nov 2018 19:52:08 +0100 Subject: [PATCH 112/150] added error handling on recv_result --- include/ham/net/communicator_tcp.hpp | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/include/ham/net/communicator_tcp.hpp b/include/ham/net/communicator_tcp.hpp index 87baee7..9f5b4d6 100644 --- a/include/ham/net/communicator_tcp.hpp +++ b/include/ham/net/communicator_tcp.hpp @@ -415,8 +415,13 @@ class communicator { // : public std::enable_shared_from_this boost::asio::async_read(*peers[req.target_node].tcp_socket, boost::asio::buffer(static_cast(&peers[req.target_node].msg_buffers[req.recv_buffer_index]), constants::MSG_SIZE), [this, &req](boost::system::error_code ec, size_t length) { - req.received_ = true; - HAM_DEBUG( HAM_LOG << "THREAD: Async completion handler executed, recv_result() completed " << req.target_node << std::endl; ) + if (!ec) + { + req.received_ = true; + HAM_DEBUG( HAM_LOG << "THREAD: Async completion handler executed, recv_result() completed " << req.target_node << std::endl; ) + } else { + HAM_DEBUG( HAM_LOG << "THREAD: Async completion handler executed, failed to recv_result() from " << req.target_node << " Error: " << ec.message() << std::endl; ) + } } ); // MPI_Irecv(static_cast(&peers[req.target_node].msg_buffers[req.recv_buffer_index]), constants::MSG_SIZE, MPI_BYTE, req.target_node, constants::RESULT_TAG, MPI_COMM_WORLD, &req.next_mpi_request()); From 86e8bb7d7ab888cfecbe015713dfb1829e27f1ef Mon Sep 17 00:00:00 2001 From: Daniel Deppisch Date: Fri, 2 Nov 2018 19:57:45 +0100 Subject: [PATCH 113/150] added error handling on recv_result --- include/ham/net/communicator_tcp.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/ham/net/communicator_tcp.hpp b/include/ham/net/communicator_tcp.hpp index 9f5b4d6..4a1ccf6 100644 --- a/include/ham/net/communicator_tcp.hpp +++ b/include/ham/net/communicator_tcp.hpp @@ -401,7 +401,7 @@ class communicator { // : public std::enable_shared_from_this // only to be used by request.send_result() template void send_result(node_t target_node, T* message, size_t size) { - + HAM_DEBUG( HAM_LOG << "communicator::send_result(): sending result to host" << std::endl; ) boost::asio::write(*peers[target_node].tcp_socket, boost::asio::buffer((void*)message, size)); } From d16d33629eaa8a5da38c3f33fcc6bed78eaafde3 Mon Sep 17 00:00:00 2001 From: Daniel Deppisch Date: Fri, 2 Nov 2018 20:01:29 +0100 Subject: [PATCH 114/150] added debug output for send_result() --- include/ham/net/communicator_tcp.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/ham/net/communicator_tcp.hpp b/include/ham/net/communicator_tcp.hpp index 4a1ccf6..bfc4d88 100644 --- a/include/ham/net/communicator_tcp.hpp +++ b/include/ham/net/communicator_tcp.hpp @@ -401,7 +401,7 @@ class communicator { // : public std::enable_shared_from_this // only to be used by request.send_result() template void send_result(node_t target_node, T* message, size_t size) { - HAM_DEBUG( HAM_LOG << "communicator::send_result(): sending result to host" << std::endl; ) + HAM_DEBUG( HAM_LOG << "communicator::send_result(): node " << target_node << " sending result to host" << std::endl; ) boost::asio::write(*peers[target_node].tcp_socket, boost::asio::buffer((void*)message, size)); } From 268e817e7228f45e6a73e12fef0e8a86d9c8446e Mon Sep 17 00:00:00 2001 From: Daniel Deppisch Date: Fri, 2 Nov 2018 20:03:05 +0100 Subject: [PATCH 115/150] fixed send_result() target node --- include/ham/net/communicator_tcp.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/ham/net/communicator_tcp.hpp b/include/ham/net/communicator_tcp.hpp index bfc4d88..482e3b7 100644 --- a/include/ham/net/communicator_tcp.hpp +++ b/include/ham/net/communicator_tcp.hpp @@ -402,7 +402,7 @@ class communicator { // : public std::enable_shared_from_this template void send_result(node_t target_node, T* message, size_t size) { HAM_DEBUG( HAM_LOG << "communicator::send_result(): node " << target_node << " sending result to host" << std::endl; ) - boost::asio::write(*peers[target_node].tcp_socket, boost::asio::buffer((void*)message, size)); + boost::asio::write(*peers[host_node_].tcp_socket, boost::asio::buffer((void*)message, size)); } // host only -> async From 617252e061b263f9f7e31ef48c4ef30ef20880e6 Mon Sep 17 00:00:00 2001 From: Daniel Deppisch Date: Fri, 2 Nov 2018 20:04:11 +0100 Subject: [PATCH 116/150] fixed send_result() target node - properly --- include/ham/net/communicator_tcp.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/ham/net/communicator_tcp.hpp b/include/ham/net/communicator_tcp.hpp index 482e3b7..bfd1cd7 100644 --- a/include/ham/net/communicator_tcp.hpp +++ b/include/ham/net/communicator_tcp.hpp @@ -104,7 +104,7 @@ class communicator { // : public std::enable_shared_from_this // TODO(improvement, low priority): better go through communicator, such that no MPI calls are anywhere else // MPI_Send(result_msg, size, MPI_BYTE, source_node, constants::RESULT_TAG, MPI_COMM_WORLD); - communicator::instance().send_result(target_node, result_msg, size); + communicator::instance().send_result(source_node, result_msg, size); // don't need size * sizeof(T) because req.send_result is called as send_result((void*)&a, sizeof(a)) in offload_msg.hpp } @@ -402,7 +402,7 @@ class communicator { // : public std::enable_shared_from_this template void send_result(node_t target_node, T* message, size_t size) { HAM_DEBUG( HAM_LOG << "communicator::send_result(): node " << target_node << " sending result to host" << std::endl; ) - boost::asio::write(*peers[host_node_].tcp_socket, boost::asio::buffer((void*)message, size)); + boost::asio::write(*peers[target_node].tcp_socket, boost::asio::buffer((void*)message, size)); } // host only -> async From 9b935a631e48bffcccc32c93c633a76e8c3751b6 Mon Sep 17 00:00:00 2001 From: Daniel Deppisch Date: Fri, 2 Nov 2018 20:19:43 +0100 Subject: [PATCH 117/150] sending full MSG_SIZE for results --- include/ham/net/communicator_tcp.hpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/include/ham/net/communicator_tcp.hpp b/include/ham/net/communicator_tcp.hpp index bfd1cd7..de11114 100644 --- a/include/ham/net/communicator_tcp.hpp +++ b/include/ham/net/communicator_tcp.hpp @@ -104,6 +104,8 @@ class communicator { // : public std::enable_shared_from_this // TODO(improvement, low priority): better go through communicator, such that no MPI calls are anywhere else // MPI_Send(result_msg, size, MPI_BYTE, source_node, constants::RESULT_TAG, MPI_COMM_WORLD); + + communicator::instance().send_result(source_node, result_msg, size); // don't need size * sizeof(T) because req.send_result is called as send_result((void*)&a, sizeof(a)) in offload_msg.hpp } @@ -402,7 +404,10 @@ class communicator { // : public std::enable_shared_from_this template void send_result(node_t target_node, T* message, size_t size) { HAM_DEBUG( HAM_LOG << "communicator::send_result(): node " << target_node << " sending result to host" << std::endl; ) - boost::asio::write(*peers[target_node].tcp_socket, boost::asio::buffer((void*)message, size)); + void* ptr; // ugly stuff to wrap result into MSG_SIZE buffer TODO(improvement): change to transfering only actual result size by using delimiter and read_until in recv_result() + posix_memalign((void**)&ptr, constants::CACHE_LINE_SIZE, constants::MSG_SIZE); + memcpy(ptr, message, size); + boost::asio::write(*peers[target_node].tcp_socket, boost::asio::buffer(ptr, constants::MSG_SIZE)); } // host only -> async From 126699ae6361225159efaa940d87ec1000b6160a Mon Sep 17 00:00:00 2001 From: Daniel Deppisch Date: Fri, 2 Nov 2018 20:51:58 +0100 Subject: [PATCH 118/150] fixed tcp to use same copy protocol as MPI --- include/ham/offload/offload.hpp | 2 +- src/benchmark_ham_offload.cpp | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/include/ham/offload/offload.hpp b/include/ham/offload/offload.hpp index 58e7e19..334ec45 100644 --- a/include/ham/offload/offload.hpp +++ b/include/ham/offload/offload.hpp @@ -339,7 +339,7 @@ void copy_sync(buffer_ptr source, buffer_ptr dest, size_t n) // fix 1st arg: // comm.send_data(src_node, local_source, remote_dest, n); // static_assert(false, "copy is not implemented yet for the SCIF back-end"); -#elif defined HAM_COMM_MPI +#elif defined(HAM_COMM_MPI) || defined(HAM_COMM_TCP) // send corresponding write and read messages to the sender and the receiver // issues a send operation on the source node, that sends the memory at source to the destination node diff --git a/src/benchmark_ham_offload.cpp b/src/benchmark_ham_offload.cpp index 3b56e3a..dabe62d 100644 --- a/src/benchmark_ham_offload.cpp +++ b/src/benchmark_ham_offload.cpp @@ -166,9 +166,9 @@ int main(int argc, char * argv[]) std::cout << "# COMM_MPI_RMA_DYNAMIC disabled" << std::endl; #endif #ifdef HAM_COMM_TCP - std::cout << "# COMM_TCP enabled" << std::endl; + std::cout << "# COMM_TCP enabled" << std::endl; #else - std::cout << "# COMM_TCP disabled" << std::endl; + std::cout << "# COMM_TCP disabled" << std::endl; #endif #ifdef HAM_COMM_SCIF std::cout << "# HAM_COMM_SCIF enabled" << std::endl; From ff4fd42ee40ba2b511001d7668ab9fc628e851d9 Mon Sep 17 00:00:00 2001 From: Daniel Deppisch Date: Fri, 2 Nov 2018 21:04:02 +0100 Subject: [PATCH 119/150] unfixed tcp copy to not implemented... as originally intended --- include/ham/offload/offload.hpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/ham/offload/offload.hpp b/include/ham/offload/offload.hpp index 334ec45..7632a1b 100644 --- a/include/ham/offload/offload.hpp +++ b/include/ham/offload/offload.hpp @@ -329,17 +329,17 @@ future copy(buffer_ptr source, buffer_ptr dest, size_t n) } #endif -#ifndef HAM_COMM_ONE_SIDED // TODO(feature, high priority): implement +#if !defined(HAM_COMM_ONE_SIDED) || !defined(HAM_COMM_TCP)// TODO(feature, high priority): implement template void copy_sync(buffer_ptr source, buffer_ptr dest, size_t n) { net::communicator& comm = runtime::instance().communicator(); -#ifdef HAM_COMM_ONE_SIDED +#if defined(HAM_COMM_ONE_SIDED) || defined(HAM_COMM_TCP) // TODO(feature, high priority): implement // fix 1st arg: // comm.send_data(src_node, local_source, remote_dest, n); // static_assert(false, "copy is not implemented yet for the SCIF back-end"); -#elif defined(HAM_COMM_MPI) || defined(HAM_COMM_TCP) +#elif defined HAM_COMM_MPI // send corresponding write and read messages to the sender and the receiver // issues a send operation on the source node, that sends the memory at source to the destination node From 462bbbf7666bec93e83cef86be2aea774d02a12b Mon Sep 17 00:00:00 2001 From: Daniel Deppisch Date: Tue, 6 Nov 2018 21:01:00 +0100 Subject: [PATCH 120/150] made sent/received completion flags volatile --- CMakeLists.txt | 2 +- include/ham/net/communicator_tcp.hpp | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 5c48af8..30f3dbd 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -41,7 +41,7 @@ else () endif () # tell the compiler to be strict -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -hstd=c++11") +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -DHAM_DEBUG_ON") add_subdirectory(thirdparty/bmt ${CMAKE_CURRENT_BINARY_DIR}/build.noma_bmt) diff --git a/include/ham/net/communicator_tcp.hpp b/include/ham/net/communicator_tcp.hpp index de11114..33332fd 100644 --- a/include/ham/net/communicator_tcp.hpp +++ b/include/ham/net/communicator_tcp.hpp @@ -126,8 +126,8 @@ class communicator { // : public std::enable_shared_from_this node_t target_node; node_t source_node; bool valid_; - bool received_; // used for the async receive handler to set to true, checked for completion - bool sent_; // used for the async send handler to set to true... unused, but the handler likes to do something + volatile bool received_; // used for the async receive handler to set to true, checked for completion + volatile bool sent_; // used for the async send handler to set to true... unused, but the handler likes to do something // only needed by the sender enum { NUM_REQUESTS = 3 }; @@ -403,7 +403,7 @@ class communicator { // : public std::enable_shared_from_this // only to be used by request.send_result() template void send_result(node_t target_node, T* message, size_t size) { - HAM_DEBUG( HAM_LOG << "communicator::send_result(): node " << target_node << " sending result to host" << std::endl; ) + HAM_DEBUG( HAM_LOG << "communicator::send_result(): node " << this_node_ << " sending result to node: " << target_node << std::endl; ) void* ptr; // ugly stuff to wrap result into MSG_SIZE buffer TODO(improvement): change to transfering only actual result size by using delimiter and read_until in recv_result() posix_memalign((void**)&ptr, constants::CACHE_LINE_SIZE, constants::MSG_SIZE); memcpy(ptr, message, size); From b81187bbcae6990ebefce3c83124bf3f8a3e5a6a Mon Sep 17 00:00:00 2001 From: Daniel Deppisch Date: Tue, 6 Nov 2018 21:39:20 +0100 Subject: [PATCH 121/150] proper asio connetion teardown --- include/ham/net/communicator_tcp.hpp | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/include/ham/net/communicator_tcp.hpp b/include/ham/net/communicator_tcp.hpp index 33332fd..3e43df2 100644 --- a/include/ham/net/communicator_tcp.hpp +++ b/include/ham/net/communicator_tcp.hpp @@ -311,15 +311,14 @@ class communicator { // : public std::enable_shared_from_this HAM_DEBUG( HAM_LOG << "communicator::communicator(): initializing buffers done" << std::endl; ) // host runs io_context in separate thread (asynchronous progress thread) for async operations - - std::thread thread([this](){ + work = boost::asio::make_work_guard(io_context); + thread = std::thread([this](){ HAM_DEBUG( HAM_LOG << "ASYNC THREAD: Heyooo, I live." << std::endl; ) - boost::asio::io_service::work work(io_context); io_context.run(); HAM_DEBUG( HAM_LOG << "ASYNC THREAD: Oh noes, I'm dead!" << std::endl; ) } ); - thread.detach(); + // thread.detach(); no longer needed with member thread HAM_DEBUG( HAM_LOG << "communicator::communicator(): async thread started" << std::endl; ) } @@ -332,8 +331,10 @@ class communicator { // : public std::enable_shared_from_this { // finalize if(is_host()) { - io_context.stop(); + work.reset(); + thread.join(); } + io_context.stop(); HAM_DEBUG( HAM_LOG << "~communicator" << std::endl; ) } @@ -517,7 +518,9 @@ class communicator { // : public std::enable_shared_from_this std::string host_port_; node_descriptor node_desc_dummy; boost::asio::io_service io_context; - + std::thread_ thread; + //boost::asio::io_service::work work; //1.65 syntax + boost::asio::executor_work_guard work; struct tcp_peer { buffer_ptr msg_buffers; // buffers used for MPI_ISend and IRecv by the sender From 09307f75c0d8e456e92659df7cb4ab7e68b1caa6 Mon Sep 17 00:00:00 2001 From: Daniel Deppisch Date: Tue, 6 Nov 2018 21:41:16 +0100 Subject: [PATCH 122/150] proper asio connetion teardown --- include/ham/net/communicator_tcp.hpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/ham/net/communicator_tcp.hpp b/include/ham/net/communicator_tcp.hpp index 3e43df2..7049661 100644 --- a/include/ham/net/communicator_tcp.hpp +++ b/include/ham/net/communicator_tcp.hpp @@ -311,8 +311,8 @@ class communicator { // : public std::enable_shared_from_this HAM_DEBUG( HAM_LOG << "communicator::communicator(): initializing buffers done" << std::endl; ) // host runs io_context in separate thread (asynchronous progress thread) for async operations - work = boost::asio::make_work_guard(io_context); - thread = std::thread([this](){ + work_ = boost::asio::make_work_guard(io_context); + thread_ = std::thread([this](){ HAM_DEBUG( HAM_LOG << "ASYNC THREAD: Heyooo, I live." << std::endl; ) io_context.run(); HAM_DEBUG( HAM_LOG << "ASYNC THREAD: Oh noes, I'm dead!" << std::endl; ) @@ -520,7 +520,7 @@ class communicator { // : public std::enable_shared_from_this boost::asio::io_service io_context; std::thread_ thread; //boost::asio::io_service::work work; //1.65 syntax - boost::asio::executor_work_guard work; + boost::asio::executor_work_guard work_; struct tcp_peer { buffer_ptr msg_buffers; // buffers used for MPI_ISend and IRecv by the sender From 2da089a6291fe0019c8f6ae82a2949aa005b847d Mon Sep 17 00:00:00 2001 From: Daniel Deppisch Date: Tue, 6 Nov 2018 21:42:33 +0100 Subject: [PATCH 123/150] proper asio connetion teardown --- include/ham/net/communicator_tcp.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/ham/net/communicator_tcp.hpp b/include/ham/net/communicator_tcp.hpp index 7049661..09885ab 100644 --- a/include/ham/net/communicator_tcp.hpp +++ b/include/ham/net/communicator_tcp.hpp @@ -318,7 +318,7 @@ class communicator { // : public std::enable_shared_from_this HAM_DEBUG( HAM_LOG << "ASYNC THREAD: Oh noes, I'm dead!" << std::endl; ) } ); - // thread.detach(); no longer needed with member thread + // thread_.detach(); no longer needed with member thread HAM_DEBUG( HAM_LOG << "communicator::communicator(): async thread started" << std::endl; ) } @@ -332,7 +332,7 @@ class communicator { // : public std::enable_shared_from_this // finalize if(is_host()) { work.reset(); - thread.join(); + thread_.join(); } io_context.stop(); HAM_DEBUG( HAM_LOG << "~communicator" << std::endl; ) From e56004e8d0c5449275a19ededd92f208bb17a780 Mon Sep 17 00:00:00 2001 From: Daniel Deppisch Date: Tue, 6 Nov 2018 21:43:50 +0100 Subject: [PATCH 124/150] proper asio connetion teardown --- include/ham/net/communicator_tcp.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/ham/net/communicator_tcp.hpp b/include/ham/net/communicator_tcp.hpp index 09885ab..431b5be 100644 --- a/include/ham/net/communicator_tcp.hpp +++ b/include/ham/net/communicator_tcp.hpp @@ -518,7 +518,7 @@ class communicator { // : public std::enable_shared_from_this std::string host_port_; node_descriptor node_desc_dummy; boost::asio::io_service io_context; - std::thread_ thread; + std::thread thread_; //boost::asio::io_service::work work; //1.65 syntax boost::asio::executor_work_guard work_; struct tcp_peer { From fa9120ae7bffa07fa9a96aa74f14500789d7036f Mon Sep 17 00:00:00 2001 From: Daniel Deppisch Date: Tue, 6 Nov 2018 21:44:40 +0100 Subject: [PATCH 125/150] proper asio connetion teardown --- include/ham/net/communicator_tcp.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/ham/net/communicator_tcp.hpp b/include/ham/net/communicator_tcp.hpp index 431b5be..5e8b51f 100644 --- a/include/ham/net/communicator_tcp.hpp +++ b/include/ham/net/communicator_tcp.hpp @@ -331,7 +331,7 @@ class communicator { // : public std::enable_shared_from_this { // finalize if(is_host()) { - work.reset(); + work_.reset(); thread_.join(); } io_context.stop(); From 1671a121eb8628813a5ab4bdbe3a93c9e836a997 Mon Sep 17 00:00:00 2001 From: Daniel Deppisch Date: Tue, 6 Nov 2018 22:18:00 +0100 Subject: [PATCH 126/150] changed work guard --- include/ham/net/communicator_tcp.hpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/include/ham/net/communicator_tcp.hpp b/include/ham/net/communicator_tcp.hpp index 5e8b51f..aaf3a8b 100644 --- a/include/ham/net/communicator_tcp.hpp +++ b/include/ham/net/communicator_tcp.hpp @@ -311,7 +311,7 @@ class communicator { // : public std::enable_shared_from_this HAM_DEBUG( HAM_LOG << "communicator::communicator(): initializing buffers done" << std::endl; ) // host runs io_context in separate thread (asynchronous progress thread) for async operations - work_ = boost::asio::make_work_guard(io_context); + work_ = boost::asio::io_service::work(io_context); thread_ = std::thread([this](){ HAM_DEBUG( HAM_LOG << "ASYNC THREAD: Heyooo, I live." << std::endl; ) io_context.run(); @@ -519,9 +519,10 @@ class communicator { // : public std::enable_shared_from_this node_descriptor node_desc_dummy; boost::asio::io_service io_context; std::thread thread_; - //boost::asio::io_service::work work; //1.65 syntax - boost::asio::executor_work_guard work_; - struct tcp_peer { + boost::asio::io_service::work work_; //1.65 syntax + //boost::asio::executor_work_guard work_; // 1.66 syntax + + struct tcp_peer { buffer_ptr msg_buffers; // buffers used for MPI_ISend and IRecv by the sender // needed by sender to manage which buffers are in use and which are free From 1adea8c9db6f3665b75df64a1ed09576ea8c54ab Mon Sep 17 00:00:00 2001 From: Daniel Deppisch Date: Tue, 6 Nov 2018 22:31:10 +0100 Subject: [PATCH 127/150] changed work guard --- include/ham/net/communicator_tcp.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/ham/net/communicator_tcp.hpp b/include/ham/net/communicator_tcp.hpp index aaf3a8b..017c5f0 100644 --- a/include/ham/net/communicator_tcp.hpp +++ b/include/ham/net/communicator_tcp.hpp @@ -311,9 +311,9 @@ class communicator { // : public std::enable_shared_from_this HAM_DEBUG( HAM_LOG << "communicator::communicator(): initializing buffers done" << std::endl; ) // host runs io_context in separate thread (asynchronous progress thread) for async operations - work_ = boost::asio::io_service::work(io_context); thread_ = std::thread([this](){ HAM_DEBUG( HAM_LOG << "ASYNC THREAD: Heyooo, I live." << std::endl; ) + work_ = boost::asio::io_service::work(io_context); io_context.run(); HAM_DEBUG( HAM_LOG << "ASYNC THREAD: Oh noes, I'm dead!" << std::endl; ) } From 5431c8e91b9b3faaa83069984be23c4537ff1fbe Mon Sep 17 00:00:00 2001 From: Daniel Deppisch Date: Tue, 6 Nov 2018 22:35:31 +0100 Subject: [PATCH 128/150] changed work guard --- include/ham/net/communicator_tcp.hpp | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/include/ham/net/communicator_tcp.hpp b/include/ham/net/communicator_tcp.hpp index 017c5f0..daad480 100644 --- a/include/ham/net/communicator_tcp.hpp +++ b/include/ham/net/communicator_tcp.hpp @@ -313,7 +313,7 @@ class communicator { // : public std::enable_shared_from_this // host runs io_context in separate thread (asynchronous progress thread) for async operations thread_ = std::thread([this](){ HAM_DEBUG( HAM_LOG << "ASYNC THREAD: Heyooo, I live." << std::endl; ) - work_ = boost::asio::io_service::work(io_context); + boost::asio::io_service::work work(io_context); io_context.run(); HAM_DEBUG( HAM_LOG << "ASYNC THREAD: Oh noes, I'm dead!" << std::endl; ) } @@ -330,12 +330,11 @@ class communicator { // : public std::enable_shared_from_this ~communicator() { // finalize - if(is_host()) { - work_.reset(); - thread_.join(); - } io_context.stop(); - HAM_DEBUG( HAM_LOG << "~communicator" << std::endl; ) + if(is_host()) { + thread_.join(); + } + HAM_DEBUG( HAM_LOG << "~communicator" << std::endl; ) } @@ -519,8 +518,6 @@ class communicator { // : public std::enable_shared_from_this node_descriptor node_desc_dummy; boost::asio::io_service io_context; std::thread thread_; - boost::asio::io_service::work work_; //1.65 syntax - //boost::asio::executor_work_guard work_; // 1.66 syntax struct tcp_peer { buffer_ptr msg_buffers; // buffers used for MPI_ISend and IRecv by the sender From 9b9d5d4ed669a0062351e03a2bb24b8ab03e4647 Mon Sep 17 00:00:00 2001 From: Daniel Deppisch Date: Tue, 6 Nov 2018 22:55:51 +0100 Subject: [PATCH 129/150] workaround to prevent tcp target from crashing due to connection closing before terminate functor is transmitted and executed --- include/ham/offload/offload.hpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/include/ham/offload/offload.hpp b/include/ham/offload/offload.hpp index 7632a1b..9a66c9d 100644 --- a/include/ham/offload/offload.hpp +++ b/include/ham/offload/offload.hpp @@ -197,6 +197,9 @@ void ping(node_t node, Functor&& func) HAM_DEBUG( HAM_LOG << "runtime::ping(): sending msg..." << std::endl; ) net::communicator::request req = comm.allocate_request(node); // TODO(improvement): resource deallocation of this request (currently only used for terminating) comm.send_msg(req, (void*)&msg, sizeof msg); +#if defined(HAM_COMM_TCP) + while(!req.sent()) // ugly workaround to prevent target from crashing because of connection teardown before the terminate functor is executed +#fi HAM_DEBUG( HAM_LOG << "runtime::ping(): sending msg done." << std::endl; ) } From 940c8b7f68ecee6a6d24242d7b0e14ca6c1a443c Mon Sep 17 00:00:00 2001 From: Daniel Deppisch Date: Tue, 6 Nov 2018 22:56:54 +0100 Subject: [PATCH 130/150] workaround to prevent tcp target from crashing due to connection closing before terminate functor is transmitted and executed --- include/ham/offload/offload.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/ham/offload/offload.hpp b/include/ham/offload/offload.hpp index 9a66c9d..43a8c98 100644 --- a/include/ham/offload/offload.hpp +++ b/include/ham/offload/offload.hpp @@ -199,7 +199,7 @@ void ping(node_t node, Functor&& func) comm.send_msg(req, (void*)&msg, sizeof msg); #if defined(HAM_COMM_TCP) while(!req.sent()) // ugly workaround to prevent target from crashing because of connection teardown before the terminate functor is executed -#fi +#endfi HAM_DEBUG( HAM_LOG << "runtime::ping(): sending msg done." << std::endl; ) } From 82dc863d97fb7748c54111adf4d6364689e68c74 Mon Sep 17 00:00:00 2001 From: Daniel Deppisch Date: Tue, 6 Nov 2018 22:57:27 +0100 Subject: [PATCH 131/150] workaround to prevent tcp target from crashing due to connection closing before terminate functor is transmitted and executed --- include/ham/offload/offload.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/ham/offload/offload.hpp b/include/ham/offload/offload.hpp index 43a8c98..6a01569 100644 --- a/include/ham/offload/offload.hpp +++ b/include/ham/offload/offload.hpp @@ -199,7 +199,7 @@ void ping(node_t node, Functor&& func) comm.send_msg(req, (void*)&msg, sizeof msg); #if defined(HAM_COMM_TCP) while(!req.sent()) // ugly workaround to prevent target from crashing because of connection teardown before the terminate functor is executed -#endfi +#endif HAM_DEBUG( HAM_LOG << "runtime::ping(): sending msg done." << std::endl; ) } From 5c0ba9af595c249322572733e47ead69a65d73e4 Mon Sep 17 00:00:00 2001 From: Daniel Deppisch Date: Tue, 6 Nov 2018 23:25:58 +0100 Subject: [PATCH 132/150] workaround to prevent tcp target from crashing due to connection closing before terminate functor is transmitted and executed --- include/ham/offload/offload.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/ham/offload/offload.hpp b/include/ham/offload/offload.hpp index 6a01569..00db9f6 100644 --- a/include/ham/offload/offload.hpp +++ b/include/ham/offload/offload.hpp @@ -198,7 +198,7 @@ void ping(node_t node, Functor&& func) net::communicator::request req = comm.allocate_request(node); // TODO(improvement): resource deallocation of this request (currently only used for terminating) comm.send_msg(req, (void*)&msg, sizeof msg); #if defined(HAM_COMM_TCP) - while(!req.sent()) // ugly workaround to prevent target from crashing because of connection teardown before the terminate functor is executed + while(!req.sent()) {} // ugly workaround to prevent target from crashing because of connection teardown before the terminate functor is executed #endif HAM_DEBUG( HAM_LOG << "runtime::ping(): sending msg done." << std::endl; ) } From 589fdd09fae220955c338d891473ecb5a63a7dfe Mon Sep 17 00:00:00 2001 From: Daniel Deppisch Date: Tue, 6 Nov 2018 23:45:08 +0100 Subject: [PATCH 133/150] workaround to prevent tcp target from crashing due to connection closing before terminate functor is transmitted and executed --- include/ham/net/communicator_tcp.hpp | 4 ++++ include/ham/offload/offload.hpp | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/include/ham/net/communicator_tcp.hpp b/include/ham/net/communicator_tcp.hpp index daad480..2a98be9 100644 --- a/include/ham/net/communicator_tcp.hpp +++ b/include/ham/net/communicator_tcp.hpp @@ -123,6 +123,10 @@ class communicator { // : public std::enable_shared_from_this return sent_; } + void wait_sent() const { + while(!sent_); + } + node_t target_node; node_t source_node; bool valid_; diff --git a/include/ham/offload/offload.hpp b/include/ham/offload/offload.hpp index 00db9f6..221693c 100644 --- a/include/ham/offload/offload.hpp +++ b/include/ham/offload/offload.hpp @@ -198,7 +198,7 @@ void ping(node_t node, Functor&& func) net::communicator::request req = comm.allocate_request(node); // TODO(improvement): resource deallocation of this request (currently only used for terminating) comm.send_msg(req, (void*)&msg, sizeof msg); #if defined(HAM_COMM_TCP) - while(!req.sent()) {} // ugly workaround to prevent target from crashing because of connection teardown before the terminate functor is executed + req.wait_sent(); // ugly workaround to prevent target from crashing because of connection teardown before the terminate functor is executed #endif HAM_DEBUG( HAM_LOG << "runtime::ping(): sending msg done." << std::endl; ) } From 476857d9b5ab6a0ce230c6b7ea9a7e778dfd7de8 Mon Sep 17 00:00:00 2001 From: Daniel Deppisch Date: Wed, 7 Nov 2018 00:01:50 +0100 Subject: [PATCH 134/150] testing work guard --- include/ham/net/communicator_tcp.hpp | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/include/ham/net/communicator_tcp.hpp b/include/ham/net/communicator_tcp.hpp index 2a98be9..b7a15ed 100644 --- a/include/ham/net/communicator_tcp.hpp +++ b/include/ham/net/communicator_tcp.hpp @@ -124,7 +124,7 @@ class communicator { // : public std::enable_shared_from_this } void wait_sent() const { - while(!sent_); + while(!sent_) {}; } node_t target_node; @@ -315,9 +315,10 @@ class communicator { // : public std::enable_shared_from_this HAM_DEBUG( HAM_LOG << "communicator::communicator(): initializing buffers done" << std::endl; ) // host runs io_context in separate thread (asynchronous progress thread) for async operations - thread_ = std::thread([this](){ + boost::asio::io_service::work work(io_context); + thread_ = std::thread([this](){ HAM_DEBUG( HAM_LOG << "ASYNC THREAD: Heyooo, I live." << std::endl; ) - boost::asio::io_service::work work(io_context); + // TODO(bug fix): need to figure out how to reset work from main thread so the background thread can return from run() before the host killst the io_context io_context.run(); HAM_DEBUG( HAM_LOG << "ASYNC THREAD: Oh noes, I'm dead!" << std::endl; ) } @@ -333,7 +334,12 @@ class communicator { // : public std::enable_shared_from_this ~communicator() { - // finalize + // TODO(bug fix): what we actually want: + // stop the work guard, so the thread will return from io_context.run() when all outstanding ops completed + // join the thread so the host waits until above is done + // stop the context + // currently: have to kill the context first because otherwise the thread wont complete to be joined + // but this causes thread to abandon any outstanding ops io_context.stop(); if(is_host()) { thread_.join(); From 2939029536713d81182f9343796a76faac66a0a0 Mon Sep 17 00:00:00 2001 From: Daniel Deppisch Date: Wed, 7 Nov 2018 00:03:26 +0100 Subject: [PATCH 135/150] testing work guard --- include/ham/net/communicator_tcp.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/ham/net/communicator_tcp.hpp b/include/ham/net/communicator_tcp.hpp index b7a15ed..1591948 100644 --- a/include/ham/net/communicator_tcp.hpp +++ b/include/ham/net/communicator_tcp.hpp @@ -316,7 +316,7 @@ class communicator { // : public std::enable_shared_from_this // host runs io_context in separate thread (asynchronous progress thread) for async operations boost::asio::io_service::work work(io_context); - thread_ = std::thread([this](){ + thread_ = std::thread([this, &work](){ HAM_DEBUG( HAM_LOG << "ASYNC THREAD: Heyooo, I live." << std::endl; ) // TODO(bug fix): need to figure out how to reset work from main thread so the background thread can return from run() before the host killst the io_context io_context.run(); From 442bc34a96880c304901518631be564caebbbdbb Mon Sep 17 00:00:00 2001 From: Daniel Deppisch Date: Wed, 7 Nov 2018 00:06:05 +0100 Subject: [PATCH 136/150] testing work guard --- include/ham/net/communicator_tcp.hpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/ham/net/communicator_tcp.hpp b/include/ham/net/communicator_tcp.hpp index 1591948..2d666a2 100644 --- a/include/ham/net/communicator_tcp.hpp +++ b/include/ham/net/communicator_tcp.hpp @@ -315,10 +315,10 @@ class communicator { // : public std::enable_shared_from_this HAM_DEBUG( HAM_LOG << "communicator::communicator(): initializing buffers done" << std::endl; ) // host runs io_context in separate thread (asynchronous progress thread) for async operations - boost::asio::io_service::work work(io_context); thread_ = std::thread([this, &work](){ HAM_DEBUG( HAM_LOG << "ASYNC THREAD: Heyooo, I live." << std::endl; ) // TODO(bug fix): need to figure out how to reset work from main thread so the background thread can return from run() before the host killst the io_context + work_ = boost::asio::io_service::work(io_context); io_context.run(); HAM_DEBUG( HAM_LOG << "ASYNC THREAD: Oh noes, I'm dead!" << std::endl; ) } @@ -528,6 +528,7 @@ class communicator { // : public std::enable_shared_from_this node_descriptor node_desc_dummy; boost::asio::io_service io_context; std::thread thread_; + boost::asio::io_service::work work_; struct tcp_peer { buffer_ptr msg_buffers; // buffers used for MPI_ISend and IRecv by the sender From dec53c8869cf6d447bae83279572be82a0e62a13 Mon Sep 17 00:00:00 2001 From: Daniel Deppisch Date: Wed, 7 Nov 2018 00:18:32 +0100 Subject: [PATCH 137/150] testing work guard --- include/ham/net/communicator_tcp.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/ham/net/communicator_tcp.hpp b/include/ham/net/communicator_tcp.hpp index 2d666a2..b99be8a 100644 --- a/include/ham/net/communicator_tcp.hpp +++ b/include/ham/net/communicator_tcp.hpp @@ -315,10 +315,10 @@ class communicator { // : public std::enable_shared_from_this HAM_DEBUG( HAM_LOG << "communicator::communicator(): initializing buffers done" << std::endl; ) // host runs io_context in separate thread (asynchronous progress thread) for async operations - thread_ = std::thread([this, &work](){ + thread_ = std::thread([this](){ HAM_DEBUG( HAM_LOG << "ASYNC THREAD: Heyooo, I live." << std::endl; ) // TODO(bug fix): need to figure out how to reset work from main thread so the background thread can return from run() before the host killst the io_context - work_ = boost::asio::io_service::work(io_context); + boost::asio::io_service::work work(io_context); io_context.run(); HAM_DEBUG( HAM_LOG << "ASYNC THREAD: Oh noes, I'm dead!" << std::endl; ) } From 9b8d7ca9acabaaec484761bcd0d12f1daee67038 Mon Sep 17 00:00:00 2001 From: Daniel Deppisch Date: Wed, 7 Nov 2018 00:20:00 +0100 Subject: [PATCH 138/150] testing work guard --- include/ham/net/communicator_tcp.hpp | 1 - 1 file changed, 1 deletion(-) diff --git a/include/ham/net/communicator_tcp.hpp b/include/ham/net/communicator_tcp.hpp index b99be8a..ce7d4ba 100644 --- a/include/ham/net/communicator_tcp.hpp +++ b/include/ham/net/communicator_tcp.hpp @@ -528,7 +528,6 @@ class communicator { // : public std::enable_shared_from_this node_descriptor node_desc_dummy; boost::asio::io_service io_context; std::thread thread_; - boost::asio::io_service::work work_; struct tcp_peer { buffer_ptr msg_buffers; // buffers used for MPI_ISend and IRecv by the sender From 426f10ab4db10832baa26c3c79f72614429a5d95 Mon Sep 17 00:00:00 2001 From: Daniel Deppisch Date: Wed, 7 Nov 2018 00:33:27 +0100 Subject: [PATCH 139/150] implemented copy --- include/ham/offload/offload.hpp | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/include/ham/offload/offload.hpp b/include/ham/offload/offload.hpp index 221693c..26d7d6a 100644 --- a/include/ham/offload/offload.hpp +++ b/include/ham/offload/offload.hpp @@ -332,12 +332,12 @@ future copy(buffer_ptr source, buffer_ptr dest, size_t n) } #endif -#if !defined(HAM_COMM_ONE_SIDED) || !defined(HAM_COMM_TCP)// TODO(feature, high priority): implement +#if !defined(HAM_COMM_ONE_SIDED)// TODO(feature, high priority): implement template void copy_sync(buffer_ptr source, buffer_ptr dest, size_t n) { net::communicator& comm = runtime::instance().communicator(); -#if defined(HAM_COMM_ONE_SIDED) || defined(HAM_COMM_TCP) +#if defined(HAM_COMM_ONE_SIDED) // TODO(feature, high priority): implement // fix 1st arg: // comm.send_data(src_node, local_source, remote_dest, n); @@ -364,6 +364,11 @@ void copy_sync(buffer_ptr source, buffer_ptr dest, size_t n) #elif defined HAM_COMM_MPI_RMA_DYNAMIC // use async copy + sync copy(source, dest, n).get(); +#elif defined HAM_COMM_TCP + void* ptr; + posix_memalign(&ptr, constants::CACHE_LINE_SIZE, n * sizeof(T)); + get_sync(source, ptr, n*sizeof(T)); + put_sync(ptr, dest,n*sizeof(T)); #endif } From c75e7d9a60af6170530c60c4c9ee60665a740e3f Mon Sep 17 00:00:00 2001 From: Daniel Deppisch Date: Wed, 7 Nov 2018 00:36:30 +0100 Subject: [PATCH 140/150] implemented copy --- include/ham/offload/offload.hpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/ham/offload/offload.hpp b/include/ham/offload/offload.hpp index 26d7d6a..dafb6da 100644 --- a/include/ham/offload/offload.hpp +++ b/include/ham/offload/offload.hpp @@ -365,10 +365,10 @@ void copy_sync(buffer_ptr source, buffer_ptr dest, size_t n) // use async copy + sync copy(source, dest, n).get(); #elif defined HAM_COMM_TCP - void* ptr; - posix_memalign(&ptr, constants::CACHE_LINE_SIZE, n * sizeof(T)); - get_sync(source, ptr, n*sizeof(T)); - put_sync(ptr, dest,n*sizeof(T)); + T* ptr; + posix_memalign((void**)&ptr, constants::CACHE_LINE_SIZE, n * sizeof(T)); + get_sync(source, ptr, n); + put_sync(ptr, dest,n); #endif } From d3599d86ce6982a2134833f72372b59e3cf9482c Mon Sep 17 00:00:00 2001 From: Daniel Deppisch Date: Thu, 8 Nov 2018 15:37:16 +0100 Subject: [PATCH 141/150] change dynamic array init for compatibility with clang --- include/ham/net/communicator_tcp.hpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/include/ham/net/communicator_tcp.hpp b/include/ham/net/communicator_tcp.hpp index ce7d4ba..5e53710 100644 --- a/include/ham/net/communicator_tcp.hpp +++ b/include/ham/net/communicator_tcp.hpp @@ -248,7 +248,10 @@ class communicator { // : public std::enable_shared_from_this temp_socks[l] = new tcp::socket(io_context); } - bool taken_ranks[nodes_] {false}; + bool taken_ranks[nodes_]; + for (int x = 0; x < nodex_; ++x) { + taken_ranks[x]= false; + } taken_ranks[0] = true; // host rank has to be correctly provided and is therefore already taken (by the executing process) for(int i=1; i < nodes_; i++) { From 67f92365d5286f65644aa50758ed254604b16606 Mon Sep 17 00:00:00 2001 From: Daniel Deppisch Date: Thu, 8 Nov 2018 15:38:15 +0100 Subject: [PATCH 142/150] change dynamic array init for compatibility with clang --- include/ham/net/communicator_tcp.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/ham/net/communicator_tcp.hpp b/include/ham/net/communicator_tcp.hpp index 5e53710..888dfb7 100644 --- a/include/ham/net/communicator_tcp.hpp +++ b/include/ham/net/communicator_tcp.hpp @@ -249,7 +249,7 @@ class communicator { // : public std::enable_shared_from_this } bool taken_ranks[nodes_]; - for (int x = 0; x < nodex_; ++x) { + for (int x = 0; x < nodes_; ++x) { taken_ranks[x]= false; } taken_ranks[0] = true; // host rank has to be correctly provided and is therefore already taken (by the executing process) From e60f0dbe6c4b091c4369e62e3eaf7802791c8f15 Mon Sep 17 00:00:00 2001 From: Daniel Deppisch Date: Fri, 9 Nov 2018 18:48:35 +0100 Subject: [PATCH 143/150] added streams --- CMakeLists.txt | 2 +- include/ham/misc/types.hpp | 1 + include/ham/offload/stream.hpp | 157 +++++++++++++++++++++++++++++++++ src/CMakeLists.txt | 3 + src/ham/CMakeLists.txt | 2 + src/test_streams.cpp | 94 ++++++++++++++++++++ 6 files changed, 258 insertions(+), 1 deletion(-) create mode 100644 include/ham/offload/stream.hpp create mode 100644 src/test_streams.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 30f3dbd..5c48af8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -41,7 +41,7 @@ else () endif () # tell the compiler to be strict -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -hstd=c++11") set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -DHAM_DEBUG_ON") add_subdirectory(thirdparty/bmt ${CMAKE_CURRENT_BINARY_DIR}/build.noma_bmt) diff --git a/include/ham/misc/types.hpp b/include/ham/misc/types.hpp index 1b8393d..cf4d7e7 100644 --- a/include/ham/misc/types.hpp +++ b/include/ham/misc/types.hpp @@ -13,6 +13,7 @@ namespace ham { typedef size_t node_t; // node type, e.g. MPI rank, identifies remote target process typedef size_t flag_t; // MPI RMA completion flag / buffer index +typedef char byte_t; typedef char* msg_buffer_t; // buffer type for messages namespace net { diff --git a/include/ham/offload/stream.hpp b/include/ham/offload/stream.hpp new file mode 100644 index 0000000..2a0f477 --- /dev/null +++ b/include/ham/offload/stream.hpp @@ -0,0 +1,157 @@ +// Copyright (c) 2013-2014 Matthias Noack (ma.noack.pr@gmail.com) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// something for "requires ham_offload_hpp" + +#ifndef ham_offload_stream_hpp +#define ham_offload_stream_hpp + +#include "ham/net/communicator.hpp" + +#include +#include + +#include "ham/misc/types.hpp" +#include "ham/functor/buffer.hpp" +#include "ham/offload/offload_msg.hpp" +#include "ham/offload/offload.hpp" +#include "ham/offload/runtime.hpp" +#include "ham/util/at_end_of_scope_do.hpp" +#include "ham/util/debug.hpp" +#include "ham/util/log.hpp" + + +namespace ham { +namespace offload { +namespace stream { + +using ::ham::net::buffer_ptr; +using ::ham::node_t; +using ::ham::byte_t; + +class ostream; + +class stream_base { +public: + stream_base(node_t target) : target_(target) {} + + stream_base(node_t target, buffer_ptr buffer, size_t size) : target_(target), buffer_(buffer), + size_(size) {} + // put common stuff of ostream/istream here + + buffer_ptr buffer() { return buffer_; } + + void buffer(buffer_ptr buffer) { buffer_ = buffer; } + + size_t size() { return size_; } + + void size(size_t size) { size_ = size; } + + node_t target() { return target_; } // no setting intended +protected: + node_t target_; + buffer_ptr buffer_; // remote sink, remote memory + size_t size_; // size of remote sink +}; + +class stream_proxy { + + friend class istream; + +public: + stream_proxy(); // default contstuctor needed for return transport dummy entries + stream_proxy(stream_base *stream) : target_(stream->target()), buffer_(stream->buffer()), + size_(stream->size()) {} + +private: + node_t target_; + buffer_ptr buffer_; + size_t size_; +}; + +class ostream : public stream_base, public std::ostringstream { + +public: + // always need the node associated with this stream + ostream(node_t target) : stream_base(target), std::ostringstream() {} + + ostream(node_t target, size_t size) : stream_base(target), std::ostringstream(), fixed_(true) { + posix_memalign((void **) &fixed_ptr_, constants::CACHE_LINE_SIZE, size); + rdbuf()->pubsetbuf(fixed_ptr_, size); + // NOTE: this does NOT set the streams buffer or size. It will only associate a buffer that should be large enough to not need resizing (user's responsibility) + // if it should not be large enough, it may still be resized/reallocated + } + + ~ostream() { + if (fixed_) std::free((void *) fixed_ptr_); + } + + const stream_proxy sync() { + std::string temp = rdbuf()->str(); // COPY ... no other option, direct pointers not accessible + if (ham::offload::is_host()) { // on host + buffer_ = offload::allocate(target_, temp.size()); + size_ = temp.size(); + offload::put_sync((byte_t *) temp.c_str(), buffer_, size_); + return stream_proxy(this); + } else { // on target + ham::net::communicator &comm = ham::offload::runtime::instance().communicator(); + buffer_ = comm.allocate_buffer((size_t) temp.size(), ham::offload::this_node()); + size_ = temp.size(); + strcpy((char *) buffer_.get(), + temp.c_str()); // COPY ... no other option, depending on backend we need the mem to be allocated by new_buffer + return stream_proxy(this); + } + } + + // we reduce the dynamic here + /* + - use like a local in-memory stream, i.e. stringstream, maybe inherit stringstream, or output version + - ss.str().data() and size() + - on explicit synchronisation request from user + - allocate remote memory, set internal butter_ptr with known size + - put() data onto target + */ +private: + bool fixed_ = false; + byte_t *fixed_ptr_ = nullptr; +}; + + +class istream : public stream_base, public std::istringstream { +public: + istream(const stream_proxy proxy) : stream_base(proxy.target_, proxy.buffer_, proxy.size_), + std::istringstream() { + if (ham::offload::is_host()) { + posix_memalign((void **) &local_ptr_, constants::CACHE_LINE_SIZE, size_); + offload::get_sync(buffer_, local_ptr_, size_); + this->rdbuf()->pubsetbuf(local_ptr_, size_); + } else { + rdbuf()->pubsetbuf(buffer_.get(), + size_); // avoid a copy that would be necessary when using str(string) to set the content + } + } + // fail on underflow, set flags/state whatever, check std::istream interface + + // maybe use stringstream and reconstruct from data_ + + ~istream() { + if (ham::offload::is_host()) { + offload::free(buffer_); + std::free((void *) local_ptr_); + } else { + ham::net::communicator &comm = ham::offload::runtime::instance().communicator(); + comm.free_buffer(buffer_); // this is where we trash "used" buffers on the targets + } + } + +private: + byte_t *local_ptr_ = nullptr; +}; + + +} // namespace stream +} +} // namespace ham +#endif // ham_offload_stream_hpp diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 8dbb21b..e32675b 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -71,6 +71,9 @@ if (MPI_FOUND) add_executable(test_argument_transfer_mpi test_argument_transfer.cpp) target_link_libraries(test_argument_transfer_mpi ham_offload_mpi) + add_executable(test_streams_mpi test_streams.cpp) + target_link_libraries(test_streams_mpi ham_offload_mpi) + # RMA MPI add_executable(ham_offload_test_mpi_rma_dynamic ham_offload.cpp) diff --git a/src/ham/CMakeLists.txt b/src/ham/CMakeLists.txt index 1652e1c..30ae5cd 100644 --- a/src/ham/CMakeLists.txt +++ b/src/ham/CMakeLists.txt @@ -18,6 +18,7 @@ set(HAM_LIB_SRC net/communicator_mpi_rma_dynamic.cpp offload/runtime.cpp offload/offload.cpp + offload/stream.cpp util/cpu_affinity.cpp) # TCP @@ -26,6 +27,7 @@ add_library(ham_offload_tcp # SHARED if BUILD_SHARED_LIBS = TRUE net/communicator_tcp.cpp offload/runtime.cpp offload/offload.cpp + offload/stream.cpp offload/main.cpp util/cpu_affinity.cpp) target_compile_definitions(ham_offload_tcp PUBLIC -DHAM_COMM_TCP=1) diff --git a/src/test_streams.cpp b/src/test_streams.cpp new file mode 100644 index 0000000..a5bfb5d --- /dev/null +++ b/src/test_streams.cpp @@ -0,0 +1,94 @@ +// Copyright (c) 2013-2014 Matthias Noack (ma.noack.pr@gmail.com) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include "ham/offload.hpp" +#include "ham/offload/stream.hpp" + +#include "cereal/archives/binary.hpp" + +#include +#include + +struct MyData { + + char one[1024]; + char two[1024]; + + template + void serialize(Archive & archive) + { + archive( one, two ); + } +}; + +// alternative: nicer, with proxy +// target +ham::offload::stream::stream_proxy offloaded_fun(ham::offload::stream::stream_proxy osp) +{ + ham::offload::stream::istream his(osp); // NOTE: data is already on the target + + MyData m1, m2, m3; + { + cereal::BinaryInputArchive iarchive(his); // Create an input archive + + iarchive(m1, m2, m3); // Read the data from the archive + } + + char* bla = "0123456789"; + strcpy(m1.one, bla); + strcpy(m1.two, bla); + char* blub = "ABCDEFGHI"; + strcpy(m2.one, blub); + strcpy(m2.two, blub); + strcpy(m2.one, bla); + strcpy(m2.two, blub); + + + ham::offload::stream::ostream hos(0); + + { + cereal::BinaryOutputArchive oarchive(hos); + oarchive(m1, m2, m3); + } + + auto out_proxy = hos.sync(); + + return out_proxy; +} + +int main(int argc, char* argv[]) +{ + ham::offload::node_t target = 1; + + ham::offload::stream::ostream hos(target); + + MyData m1, m2, m3; // could be out of scope, data to be transferred + + { + cereal::BinaryOutputArchive oarchive(hos); // Create an output archive + oarchive(m1, m2, m3); // Write the data to the archive + } // archive goes out of scope, ensuring all contents are flushed + // after this scope, data from oarchive is flushed into the stream, stream can be used + + auto out_proxy = hos.sync(); // trigger transfer to target (write has other meaning with streams) + + auto in_proxy = ham::offload::sync(target, f2f(&offloaded_fun, out_proxy)); + + ham::offload::stream::istream his(in_proxy); + + { + cereal::BinaryInputArchive iarchive(his); + iarchive(m1, m2, m3); + } + + printf("%.10s\n", m1.one); + printf("%.10s\n", m1.two); + printf("%.10s\n", m2.one); + printf("%.10s\n", m2.two); + printf("%.10s\n", m3.one); + printf("%.10s\n", m3.two); + return 0; +} + From f6c9f73cb4e3b586a7a97255959a284548cc9a3e Mon Sep 17 00:00:00 2001 From: Daniel Deppisch Date: Fri, 9 Nov 2018 18:48:52 +0100 Subject: [PATCH 144/150] added streams --- src/ham/offload/stream.cpp | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 src/ham/offload/stream.cpp diff --git a/src/ham/offload/stream.cpp b/src/ham/offload/stream.cpp new file mode 100644 index 0000000..f33c7cf --- /dev/null +++ b/src/ham/offload/stream.cpp @@ -0,0 +1,6 @@ +// Copyright (c) 2013-2014 Matthias Noack (ma.noack.pr@gmail.com) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include "ham/offload/stream.hpp" From c4bdabb38ab63e2a9b2babfd6f4111ad3be15f08 Mon Sep 17 00:00:00 2001 From: Daniel Deppisch Date: Sat, 10 Nov 2018 16:31:46 +0100 Subject: [PATCH 145/150] changed test to output result too --- include/ham/offload/stream.hpp | 32 +++++++++++++++++++++++--------- src/CMakeLists.txt | 24 ++++++++++++------------ src/ham/CMakeLists.txt | 30 +++++++++++++++--------------- src/test_argument_transfer.cpp | 2 +- 4 files changed, 51 insertions(+), 37 deletions(-) diff --git a/include/ham/offload/stream.hpp b/include/ham/offload/stream.hpp index 2a0f477..ace80f2 100644 --- a/include/ham/offload/stream.hpp +++ b/include/ham/offload/stream.hpp @@ -61,9 +61,11 @@ class stream_proxy { friend class istream; public: - stream_proxy(); // default contstuctor needed for return transport dummy entries + stream_proxy() = default; // default contstuctor needed for return transport dummy entries stream_proxy(stream_base *stream) : target_(stream->target()), buffer_(stream->buffer()), - size_(stream->size()) {} + size_(stream->size()) { + HAM_DEBUG( HAM_LOG << "stream_proxy::ctor() called" << std::endl; ) + } private: node_t target_; @@ -75,32 +77,40 @@ class ostream : public stream_base, public std::ostringstream { public: // always need the node associated with this stream - ostream(node_t target) : stream_base(target), std::ostringstream() {} + ostream(node_t target) : stream_base(target), std::ostringstream() { + HAM_DEBUG( HAM_LOG << "ostream::ctor() called" << std::endl; ) + } ostream(node_t target, size_t size) : stream_base(target), std::ostringstream(), fixed_(true) { posix_memalign((void **) &fixed_ptr_, constants::CACHE_LINE_SIZE, size); rdbuf()->pubsetbuf(fixed_ptr_, size); + HAM_DEBUG( HAM_LOG << "ostream::ctor() for provided buffer size called" << std::endl; ) // NOTE: this does NOT set the streams buffer or size. It will only associate a buffer that should be large enough to not need resizing (user's responsibility) // if it should not be large enough, it may still be resized/reallocated } ~ostream() { if (fixed_) std::free((void *) fixed_ptr_); + HAM_DEBUG( HAM_LOG << "ostream::dtor()" << std::endl; ) } const stream_proxy sync() { std::string temp = rdbuf()->str(); // COPY ... no other option, direct pointers not accessible if (ham::offload::is_host()) { // on host - buffer_ = offload::allocate(target_, temp.size()); + HAM_DEBUG( HAM_LOG << "host executing ostream::sync()" << std::endl; ) size_ = temp.size(); + buffer_ = offload::allocate(target_, size_); + HAM_DEBUG( HAM_LOG << "ostream::sync() allocated buffer @" << target_ << std::endl; ) offload::put_sync((byte_t *) temp.c_str(), buffer_, size_); + HAM_DEBUG( HAM_LOG << "ostream::sync() sent data to " << target_ << std::endl; ) return stream_proxy(this); } else { // on target + HAM_DEBUG( HAM_LOG << "target executing ostream::sync()" << std::endl; ) ham::net::communicator &comm = ham::offload::runtime::instance().communicator(); - buffer_ = comm.allocate_buffer((size_t) temp.size(), ham::offload::this_node()); size_ = temp.size(); - strcpy((char *) buffer_.get(), - temp.c_str()); // COPY ... no other option, depending on backend we need the mem to be allocated by new_buffer + buffer_ = comm.allocate_buffer(size_, ham::offload::this_node()); + HAM_DEBUG( HAM_LOG << "ostream::sync() allocated local buffer" << std::endl; ) + strcpy((char *) buffer_.get(), temp.c_str()); // COPY ... no other option, depending on backend we need the mem to be allocated by new_buffer return stream_proxy(this); } } @@ -123,13 +133,15 @@ class istream : public stream_base, public std::istringstream { public: istream(const stream_proxy proxy) : stream_base(proxy.target_, proxy.buffer_, proxy.size_), std::istringstream() { + HAM_DEBUG( HAM_LOG << "istream::ctor() called with stream_proxy" << target_ << std::endl; ) if (ham::offload::is_host()) { posix_memalign((void **) &local_ptr_, constants::CACHE_LINE_SIZE, size_); offload::get_sync(buffer_, local_ptr_, size_); + HAM_DEBUG( HAM_LOG << "istream::sync() host retrieved data from " << buffer_.node() << std::endl; ) this->rdbuf()->pubsetbuf(local_ptr_, size_); } else { - rdbuf()->pubsetbuf(buffer_.get(), - size_); // avoid a copy that would be necessary when using str(string) to set the content + rdbuf()->pubsetbuf(buffer_.get(), size_); // avoid a copy that would be necessary when using str(string) to set the content + HAM_DEBUG( HAM_LOG << "istream::sync() target set streambuffer to remote buffer" << target_ << std::endl; ) } } // fail on underflow, set flags/state whatever, check std::istream interface @@ -139,10 +151,12 @@ class istream : public stream_base, public std::istringstream { ~istream() { if (ham::offload::is_host()) { offload::free(buffer_); + HAM_DEBUG( HAM_LOG << "istream::dtor() freed memory @" << target_ << std::endl; ) std::free((void *) local_ptr_); } else { ham::net::communicator &comm = ham::offload::runtime::instance().communicator(); comm.free_buffer(buffer_); // this is where we trash "used" buffers on the targets + HAM_DEBUG( HAM_LOG << "istream::dtor() freed local memory" << std::endl; ) } } diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index e32675b..4a2dda3 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -13,8 +13,8 @@ add_subdirectory(ham) ## Explicit targets (not built by default) # TCP benchmarks -add_executable(benchmark_ham_offload_tcp benchmark_ham_offload.cpp) -target_link_libraries(benchmark_ham_offload_tcp ham_offload_tcp) +# add_executable(benchmark_ham_offload_tcp benchmark_ham_offload.cpp) +# target_link_libraries(benchmark_ham_offload_tcp ham_offload_tcp) # Intel LEO offload directive benchmark, requires Intel compiler if (CMAKE_CXX_COMPILER_ID STREQUAL "Intel") @@ -42,17 +42,17 @@ add_executable(active_msgs active_msgs.cpp) target_link_libraries(active_msgs ham_interface) # TCP tests -add_executable(ham_offload_test_tcp ham_offload.cpp) -target_link_libraries(ham_offload_test_tcp ham_offload_tcp) +# add_executable(ham_offload_test_tcp ham_offload.cpp) +# target_link_libraries(ham_offload_test_tcp ham_offload_tcp) -add_executable(inner_product_tcp inner_product.cpp) -target_link_libraries(inner_product_tcp ham_offload_tcp) +# add_executable(inner_product_tcp inner_product.cpp) +# target_link_libraries(inner_product_tcp ham_offload_tcp) -add_executable(test_data_transfer_tcp test_data_transfer.cpp) -target_link_libraries(test_data_transfer_tcp ham_offload_tcp) +# add_executable(test_data_transfer_tcp test_data_transfer.cpp) +# target_link_libraries(test_data_transfer_tcp ham_offload_tcp) -add_executable(test_argument_transfer_tcp test_argument_transfer.cpp) -target_link_libraries(test_argument_transfer_tcp ham_offload_tcp) +# add_executable(test_argument_transfer_tcp test_argument_transfer.cpp) +# target_link_libraries(test_argument_transfer_tcp ham_offload_tcp) if (MPI_FOUND) # two-sided MPI @@ -71,8 +71,8 @@ if (MPI_FOUND) add_executable(test_argument_transfer_mpi test_argument_transfer.cpp) target_link_libraries(test_argument_transfer_mpi ham_offload_mpi) - add_executable(test_streams_mpi test_streams.cpp) - target_link_libraries(test_streams_mpi ham_offload_mpi) +# add_executable(test_streams_mpi test_streams.cpp) +# target_link_libraries(test_streams_mpi ham_offload_mpi) # RMA MPI diff --git a/src/ham/CMakeLists.txt b/src/ham/CMakeLists.txt index 30ae5cd..33a736c 100644 --- a/src/ham/CMakeLists.txt +++ b/src/ham/CMakeLists.txt @@ -18,25 +18,25 @@ set(HAM_LIB_SRC net/communicator_mpi_rma_dynamic.cpp offload/runtime.cpp offload/offload.cpp - offload/stream.cpp +# offload/stream.cpp util/cpu_affinity.cpp) # TCP -add_library(ham_offload_tcp # SHARED if BUILD_SHARED_LIBS = TRUE - net/communicator.cpp - net/communicator_tcp.cpp - offload/runtime.cpp - offload/offload.cpp - offload/stream.cpp - offload/main.cpp - util/cpu_affinity.cpp) -target_compile_definitions(ham_offload_tcp PUBLIC -DHAM_COMM_TCP=1) -target_link_libraries(ham_offload_tcp PUBLIC ham_interface boost_library pthread) +#add_library(ham_offload_tcp # SHARED if BUILD_SHARED_LIBS = TRUE +# net/communicator.cpp +# net/communicator_tcp.cpp +# offload/runtime.cpp +# offload/offload.cpp +# offload/stream.cpp +# offload/main.cpp +# util/cpu_affinity.cpp) +#target_compile_definitions(ham_offload_tcp PUBLIC -DHAM_COMM_TCP=1) +#target_link_libraries(ham_offload_tcp PUBLIC ham_interface boost_library pthread) -set_target_properties(ham_offload_tcp PROPERTIES - CXX_STANDARD 11 - CXX_STANDARD_REQUIRED YES - CXX_EXTENSIONS NO) +#set_target_properties(ham_offload_tcp PROPERTIES +# CXX_STANDARD 11 +# CXX_STANDARD_REQUIRED YES +# CXX_EXTENSIONS NO) if (MPI_FOUND) add_library(ham_offload_mpi # SHARED if BUILD_SHARED_LIBS = TRUE diff --git a/src/test_argument_transfer.cpp b/src/test_argument_transfer.cpp index 97a693e..7712459 100644 --- a/src/test_argument_transfer.cpp +++ b/src/test_argument_transfer.cpp @@ -23,7 +23,7 @@ bool test_type_invokation(offload::node_t target, T arg) { T result = offload::sync(target, f2f(&type_transfer_function, arg)); bool passed = result == arg; - std::cout << "Result for type \"" << typeid(T).name() << "\": " << (passed ? "pass" : "fail") << std::endl; + std::cout << "Result for type \"" << typeid(T).name() << "\": " << arg << (passed ? " -> pass" : " -> fail") << std::endl; return passed; } From 96585b08b5ada7fbe65cf9560869d82d1a0b442e Mon Sep 17 00:00:00 2001 From: Daniel Deppisch Date: Tue, 13 Nov 2018 13:32:25 +0100 Subject: [PATCH 146/150] fix migratable use --- include/ham/misc/types.hpp | 4 +++- tools/install_boost.sh | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/include/ham/misc/types.hpp b/include/ham/misc/types.hpp index cf4d7e7..f50fa0e 100644 --- a/include/ham/misc/types.hpp +++ b/include/ham/misc/types.hpp @@ -9,6 +9,8 @@ #include #include +#include "ham/misc/migratable.hpp" + namespace ham { typedef size_t node_t; // node type, e.g. MPI rank, identifies remote target process @@ -31,7 +33,7 @@ class result_container T get() { return T(std::move(res)); } private: - T res; + migratable res; }; template<> diff --git a/tools/install_boost.sh b/tools/install_boost.sh index 2a19297..e422feb 100755 --- a/tools/install_boost.sh +++ b/tools/install_boost.sh @@ -40,7 +40,7 @@ BASHRC_FILE=$HOME/dev/null # set to /dev/null to disable, or to any other file t BOOST_BUILD_OPTIONS="-j8" # concurrent build with up to 8 commands BOOST_NAME=boost -BOOST_VERSION=1_65_1 +BOOST_VERSION=1_66_0 BOOST_MIC_SUFFIX=mic BOOST_ARCHIVE=${BOOST_NAME}_${BOOST_VERSION} # NOTE: without tar.bz2 From 657302ee36bac1bfab8bfcd65f0e463f150eb523 Mon Sep 17 00:00:00 2001 From: Phuzzyhead Date: Wed, 14 Nov 2018 02:09:45 +0100 Subject: [PATCH 147/150] fixed streams --- CMakeLists.txt | 2 +- include/ham/offload/stream.hpp | 14 ++++----- src/CMakeLists.txt | 4 +-- src/ham/CMakeLists.txt | 2 +- src/test_streams.cpp | 55 ++++++++++++++++++++++++++-------- 5 files changed, 54 insertions(+), 23 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 5c48af8..30f3dbd 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -41,7 +41,7 @@ else () endif () # tell the compiler to be strict -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -hstd=c++11") +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -DHAM_DEBUG_ON") add_subdirectory(thirdparty/bmt ${CMAKE_CURRENT_BINARY_DIR}/build.noma_bmt) diff --git a/include/ham/offload/stream.hpp b/include/ham/offload/stream.hpp index ace80f2..8225f14 100644 --- a/include/ham/offload/stream.hpp +++ b/include/ham/offload/stream.hpp @@ -100,17 +100,17 @@ class ostream : public stream_base, public std::ostringstream { HAM_DEBUG( HAM_LOG << "host executing ostream::sync()" << std::endl; ) size_ = temp.size(); buffer_ = offload::allocate(target_, size_); - HAM_DEBUG( HAM_LOG << "ostream::sync() allocated buffer @" << target_ << std::endl; ) + HAM_DEBUG( HAM_LOG << "ostream::sync() allocated buffer @" << target_ << " size: " << size_ << std::endl; ) offload::put_sync((byte_t *) temp.c_str(), buffer_, size_); - HAM_DEBUG( HAM_LOG << "ostream::sync() sent data to " << target_ << std::endl; ) + HAM_DEBUG( HAM_LOG << "ostream::sync() sent data to " << target_ << " size: " << size_ << std::endl; ) return stream_proxy(this); } else { // on target HAM_DEBUG( HAM_LOG << "target executing ostream::sync()" << std::endl; ) ham::net::communicator &comm = ham::offload::runtime::instance().communicator(); size_ = temp.size(); buffer_ = comm.allocate_buffer(size_, ham::offload::this_node()); - HAM_DEBUG( HAM_LOG << "ostream::sync() allocated local buffer" << std::endl; ) - strcpy((char *) buffer_.get(), temp.c_str()); // COPY ... no other option, depending on backend we need the mem to be allocated by new_buffer + HAM_DEBUG( HAM_LOG << "ostream::sync() allocated local buffer size: " << size_ << std::endl; ) + memcpy((char *) buffer_.get(), temp.c_str(), size_); // COPY ... no other option, depending on backend we need the mem to be allocated by new_buffer return stream_proxy(this); } } @@ -133,15 +133,15 @@ class istream : public stream_base, public std::istringstream { public: istream(const stream_proxy proxy) : stream_base(proxy.target_, proxy.buffer_, proxy.size_), std::istringstream() { - HAM_DEBUG( HAM_LOG << "istream::ctor() called with stream_proxy" << target_ << std::endl; ) + HAM_DEBUG( HAM_LOG << "istream::ctor() called with stream_proxy from: " << target_ << std::endl; ) if (ham::offload::is_host()) { posix_memalign((void **) &local_ptr_, constants::CACHE_LINE_SIZE, size_); offload::get_sync(buffer_, local_ptr_, size_); - HAM_DEBUG( HAM_LOG << "istream::sync() host retrieved data from " << buffer_.node() << std::endl; ) + HAM_DEBUG( HAM_LOG << "istream::ctor() host retrieved data from " << buffer_.node() << " size: " << size_ << std::endl; ) this->rdbuf()->pubsetbuf(local_ptr_, size_); } else { rdbuf()->pubsetbuf(buffer_.get(), size_); // avoid a copy that would be necessary when using str(string) to set the content - HAM_DEBUG( HAM_LOG << "istream::sync() target set streambuffer to remote buffer" << target_ << std::endl; ) + HAM_DEBUG( HAM_LOG << "istream::ctor() target set streambuffer to remote buffer" << target_ << " size: " << size_ << std::endl; ) } } // fail on underflow, set flags/state whatever, check std::istream interface diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 4a2dda3..f5dcdd7 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -71,8 +71,8 @@ if (MPI_FOUND) add_executable(test_argument_transfer_mpi test_argument_transfer.cpp) target_link_libraries(test_argument_transfer_mpi ham_offload_mpi) -# add_executable(test_streams_mpi test_streams.cpp) -# target_link_libraries(test_streams_mpi ham_offload_mpi) + add_executable(test_streams_mpi test_streams.cpp) + target_link_libraries(test_streams_mpi ham_offload_mpi) # RMA MPI diff --git a/src/ham/CMakeLists.txt b/src/ham/CMakeLists.txt index 33a736c..8108980 100644 --- a/src/ham/CMakeLists.txt +++ b/src/ham/CMakeLists.txt @@ -18,7 +18,7 @@ set(HAM_LIB_SRC net/communicator_mpi_rma_dynamic.cpp offload/runtime.cpp offload/offload.cpp -# offload/stream.cpp + offload/stream.cpp util/cpu_affinity.cpp) # TCP diff --git a/src/test_streams.cpp b/src/test_streams.cpp index a5bfb5d..248e52a 100644 --- a/src/test_streams.cpp +++ b/src/test_streams.cpp @@ -19,7 +19,7 @@ struct MyData { template void serialize(Archive & archive) { - archive( one, two ); + archive( cereal::binary_data( one, sizeof(char)*1024), cereal::binary_data( two, sizeof(char)*1024)); } }; @@ -28,7 +28,7 @@ struct MyData { ham::offload::stream::stream_proxy offloaded_fun(ham::offload::stream::stream_proxy osp) { ham::offload::stream::istream his(osp); // NOTE: data is already on the target - + MyData m1, m2, m3; { cereal::BinaryInputArchive iarchive(his); // Create an input archive @@ -36,17 +36,31 @@ ham::offload::stream::stream_proxy offloaded_fun(ham::offload::stream::stream_pr iarchive(m1, m2, m3); // Read the data from the archive } + printf("tin: 1.1 %.10s\n", m1.one); + printf("tin: 1.2 %.10s\n", m1.two); + printf("tin: 2.1 %.10s\n", m2.one); + printf("tin: 2.2 %.10s\n", m2.two); + printf("tin: 3.1 %.10s\n", m3.one); + printf("tin: 3.2 %.10s\n", m3.two); + char* bla = "0123456789"; strcpy(m1.one, bla); strcpy(m1.two, bla); char* blub = "ABCDEFGHI"; strcpy(m2.one, blub); strcpy(m2.two, blub); - strcpy(m2.one, bla); - strcpy(m2.two, blub); + strcpy(m3.one, bla); + strcpy(m3.two, blub); + printf("tout: 1.1 %.10s\n", m1.one); + printf("tout: 1.2 %.10s\n", m1.two); + printf("tout: 2.1 %.10s\n", m2.one); + printf("tout: 2.2 %.10s\n", m2.two); + printf("tout: 3.1 %.10s\n", m3.one); + printf("tout: 3.2 %.10s\n", m3.two); ham::offload::stream::ostream hos(0); + { cereal::BinaryOutputArchive oarchive(hos); @@ -63,9 +77,26 @@ int main(int argc, char* argv[]) ham::offload::node_t target = 1; ham::offload::stream::ostream hos(target); + MyData m1, m2, m3; // could be out of scope, data to be transferred + char* bla = "9876543210"; + strcpy(m1.one, bla); + strcpy(m1.two, bla); + char* blub = "IHGFEDCBA"; + strcpy(m2.one, blub); + strcpy(m2.two, blub); + strcpy(m3.one, bla); + strcpy(m3.two, blub); + + printf("hout: 1.1 %.10s\n", m1.one); + printf("hout: 1.2 %.10s\n", m1.two); + printf("hout: 2.1 %.10s\n", m2.one); + printf("hout: 2.2 %.10s\n", m2.two); + printf("hout: 3.1 %.10s\n", m3.one); + printf("hout: 3.2 %.10s\n", m3.two); + { cereal::BinaryOutputArchive oarchive(hos); // Create an output archive oarchive(m1, m2, m3); // Write the data to the archive @@ -76,19 +107,19 @@ int main(int argc, char* argv[]) auto in_proxy = ham::offload::sync(target, f2f(&offloaded_fun, out_proxy)); - ham::offload::stream::istream his(in_proxy); + ham::offload::stream::istream his(in_proxy); + { cereal::BinaryInputArchive iarchive(his); iarchive(m1, m2, m3); } - - printf("%.10s\n", m1.one); - printf("%.10s\n", m1.two); - printf("%.10s\n", m2.one); - printf("%.10s\n", m2.two); - printf("%.10s\n", m3.one); - printf("%.10s\n", m3.two); + printf("hin: 1.1 %.10s\n", m1.one); + printf("hin: 1.2 %.10s\n", m1.two); + printf("hin: 2.1 %.10s\n", m2.one); + printf("hin: 2.2 %.10s\n", m2.two); + printf("hin: 3.1 %.10s\n", m3.one); + printf("hin: 3.2 %.10s\n", m3.two); return 0; } From 3206fffe3406ea9a38d9d7aab20ec9aedc5de276 Mon Sep 17 00:00:00 2001 From: Daniel Deppisch Date: Sat, 8 Jun 2019 14:38:23 +0200 Subject: [PATCH 148/150] cleanup --- ...communicator_mpi_rma_dynamic_data_only.hpp | 400 ++++++++++++++++++ 1 file changed, 400 insertions(+) create mode 100644 include/ham/net/communicator_mpi_rma_dynamic_data_only.hpp diff --git a/include/ham/net/communicator_mpi_rma_dynamic_data_only.hpp b/include/ham/net/communicator_mpi_rma_dynamic_data_only.hpp new file mode 100644 index 0000000..4dff738 --- /dev/null +++ b/include/ham/net/communicator_mpi_rma_dynamic_data_only.hpp @@ -0,0 +1,400 @@ +// Copyright (c) 2013-2014 Matthias Noack (ma.noack.pr@gmail.com) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef ham_net_communicator_mpi_rma_dynamic_hpp +#define ham_net_communicator_mpi_rma_dynamic_hpp + +#include + +#include +#include // memcpy +#include // posix_memalign + +#include "ham/misc/constants.hpp" +#include "ham/misc/resource_pool.hpp" +#include "ham/misc/types.hpp" +#include "ham/util/debug.hpp" +#include "ham/util/log.hpp" + +namespace ham { +namespace net { + +template +class buffer_ptr { +public: + buffer_ptr(); + buffer_ptr(T* ptr, node_t node) : ptr_(ptr), node_(node), mpi_address_(0) { } + buffer_ptr(T* ptr, node_t node, MPI_Aint mpi_address) : ptr_(ptr), node_(node), mpi_address_(mpi_address) { } + + + T* get() { return ptr_; } + node_t node() { return node_; } + MPI_Aint get_mpi_address() { return mpi_address_; } + + // element access + T& operator [] (size_t i); + + // basic pointer arithmetic to address sub-buffers + buffer_ptr operator+(size_t off) + { + return buffer_ptr(ptr_ + off, node_); + } + +private: + T* ptr_; + node_t node_; + MPI_Aint mpi_address_; +}; + +class node_descriptor +{ +public: + //node_descriptor() : name(MPI_MAX_PROCESSOR_NAME, 0) {} + + //const std::string& name() const { return name_; } + const char* name() const { return name_; } +private: + //std::string name_; // TODO(improvement): unify node description for all back-ends, NOTE: std::string is not trivally transferable + char name_[MPI_MAX_PROCESSOR_NAME + 1]; + + friend class net::communicator; +}; + +class communicator { +public: + // externally used interface of request must be shared across all communicator-implementations + class request { + public: + request() : valid_(false) {} // instantiate invalid + + request(node_t target_node, node_t source_node, size_t send_buffer_index, size_t recv_buffer_index) + : target_node(target_node), source_node(source_node), valid_(true), send_buffer_index(send_buffer_index), recv_buffer_index(recv_buffer_index), req_count(0), uses_rma_(false) + {} + + // return true if request was finished + // will not work as intended for rma ops, no equivalent to test() available for remote completion + bool test() + { + int flag = 0; + MPI_Testall(req_count, mpi_reqs, &flag, MPI_STATUS_IGNORE); // just test the receive request, since the send belonging to the request triggers the remote send that is received + + if(uses_rma_) + { + HAM_DEBUG( HAM_LOG << "request::test(), warning: may give false positive on rma remote completion" << std::endl; ) + } + + return flag != 0; + } + + void* get() // blocks + { + HAM_DEBUG( HAM_LOG << "request::get(), before MPI_Waitall()" << std::endl; ) + MPI_Waitall(req_count, mpi_reqs, MPI_STATUS_IGNORE); // must wait for all requests to satisfy the standard + HAM_DEBUG( HAM_LOG << "request::get(), after MPI_Waitall()" << std::endl; ) + if(uses_rma_) + { + MPI_Win_flush(target_node, communicator::instance().peers[target_node].rma_win); + } + return static_cast(&communicator::instance().peers[target_node].msg_buffers[recv_buffer_index]); + } + + template + void send_result(T* result_msg, size_t size) + { + assert(communicator::this_node() == target_node); // this assert fails if send_result is called from the wrong side + + // TODO(improvement, low priority): better go through communicator, such that no MPI calls are anywhere else + MPI_Send(result_msg, size, MPI_BYTE, source_node, constants::RESULT_TAG, MPI_COMM_WORLD); + } + + bool valid() const + { + return valid_; + } + + bool uses_rma() const + { + return uses_rma_; + } + + MPI_Request& next_mpi_request() + { + HAM_DEBUG( HAM_LOG << "next_mpi_request(): this=" << this << ", req_count=" << req_count << ", NUM_REQUESTS=" << NUM_REQUESTS << std::endl; ) + assert(req_count < NUM_REQUESTS); + return mpi_reqs[req_count++]; // NOTE: post-increment + } + + node_t target_node; + node_t source_node; + bool valid_; + bool uses_rma_; + + // only needed by the sender + enum { NUM_REQUESTS = 3 }; + + size_t send_buffer_index; // buffer to use for sending the message + size_t recv_buffer_index; // buffer to use for receiving the result + size_t req_count; + + private: + MPI_Request mpi_reqs[NUM_REQUESTS]; // for sending the msg, receiving the result, and an associated data transfer + }; // class request + + typedef request& request_reference_type; + typedef const request& request_const_reference_type; + + communicator(int argc, char* argv[]) + { + HAM_DEBUG( std::cout << "communicator::communicator(): initialising MPI" << std::endl; ) + + instance_ = this; + int p; + MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &p); + if (p != MPI_THREAD_MULTIPLE) + { + std::cerr << "Could not initialise MPI with MPI_THREAD_MULTIPLE, MPI_Init_thread() returned " << p << std::endl; + } + HAM_DEBUG( std::cout << "communicator::communicator(): initialising MPI ..." << std::endl; ) + + int t; + MPI_Comm_rank(MPI_COMM_WORLD, &t); + this_node_ = t; + MPI_Comm_size(MPI_COMM_WORLD, &t); + nodes_ = t; + host_node_ = 0; // TODO(improvement): make configureable, like for SCIF + + HAM_DEBUG( std::cout << "communicator::communicator(): initialising MPI done" << std::endl; ) + + peers = new mpi_peer[nodes_]; + + // start of node descriptor code: + node_descriptions.resize(nodes_); + + // build own node descriptor + node_descriptor node_description; + int count; + MPI_Get_processor_name(node_description.name_, &count); + node_description.name_[count] = 0x0; // null terminate + + // communicate descriptors between nodes + HAM_DEBUG( HAM_LOG << "communicator::communicator(): gathering node descriptions" << std::endl; ) + MPI_Allgather(&node_description, sizeof(node_descriptor), MPI_BYTE, node_descriptions.data(), sizeof(node_descriptor), MPI_BYTE, MPI_COMM_WORLD); + HAM_DEBUG( HAM_LOG << "communicator::communicator(): gathering node descriptions done" << std::endl; ) + + + if (is_host()) { + + for (node_t i = 1; i < nodes_; ++i) { // TODO(improvement): needs to be changed when host-rank becomes configurable + // allocate buffers + peers[i].msg_buffers = allocate_peer_buffer(constants::MSG_BUFFERS, this_node_); + // fill resource pools + for (size_t j = constants::MSG_BUFFERS; j > 0; --j) { + peers[i].buffer_pool.add(j - 1); + } + } + } + + // initialise 1 global window per target for data + for (node_t i = 1; i < nodes_; ++i) { + MPI_Win_create_dynamic(MPI_INFO_NULL, MPI_COMM_WORLD, &(peers[i].rma_win)); + } + + // get all locks to targets + // targets lock to other targets for copies + for (node_t i = 1; i < nodes_; ++i) { // TODO(improvement): needs to be changed when host-rank becomes configurable + if(i != this_node_) { + MPI_Win_lock(MPI_LOCK_SHARED, i, 0, peers[i].rma_win); // shared locks because all ranks lock on every target concurrently + } + } + + HAM_DEBUG( HAM_LOG << "communicator::communicator(): rma window creation done" << std::endl; ) + + } + + ~communicator() + { + MPI_Finalize(); // TODO(improvement): check on error and create output if there was one + HAM_DEBUG( HAM_LOG << "~communicator" << std::endl; ) + } + + + request allocate_request(node_t remote_node) + { + HAM_DEBUG( HAM_LOG << "communicator::allocate_next_request(): remote_node = " << remote_node << std::endl; ) + + const size_t send_buffer_index = peers[remote_node].buffer_pool.allocate(); + const size_t recv_buffer_index = peers[remote_node].buffer_pool.allocate(); + + return { remote_node, this_node_, send_buffer_index, recv_buffer_index }; + } + + void free_request(request& req) + { + assert(req.valid()); + assert(req.source_node == this_node_); + + mpi_peer& peer = peers[req.target_node]; + + peer.buffer_pool.free(req.send_buffer_index); + peer.buffer_pool.free(req.recv_buffer_index); + req.valid_ = false; + } + +public: + void send_msg(request_reference_type req, void* msg, size_t size) + { + // copy message from caller into transfer buffer + void* msg_buffer = static_cast(&peers[req.target_node].msg_buffers[req.send_buffer_index]); + memcpy(msg_buffer, msg, size); + MPI_Isend(msg_buffer, size, MPI_BYTE, req.target_node, constants::DEFAULT_TAG, MPI_COMM_WORLD, &req.next_mpi_request()); + } + + // to be used by the offload target's main loop: synchronously receive one message at a time + // NOTE: the local static receive buffer! + void* recv_msg_host(void* msg = nullptr, size_t size = constants::MSG_SIZE) + { + static msg_buffer buffer; // NOTE ! + MPI_Recv(&buffer, size, MPI_BYTE, host_node_, constants::DEFAULT_TAG, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + return static_cast(&buffer); + } + + // trigger receiving the result of a message on the sending side + void recv_result(request_reference_type req) + { + // nothing todo here, since this communicator implementation uses one-sided communication + // the data is already where it is expected (in the buffer referenced in req) + MPI_Irecv(static_cast(&peers[req.target_node].msg_buffers[req.recv_buffer_index]), constants::MSG_SIZE, MPI_BYTE, req.target_node, constants::RESULT_TAG, MPI_COMM_WORLD, &req.next_mpi_request()); + return; + } + + // in MPI RMA backend only used by copy + // host uses async version + // targets don't send data to host as host uses rma get + template + void send_data(T* local_source, buffer_ptr remote_dest, size_t size) + { + // execute transfer + MPI_Put(local_source, size * sizeof(T), MPI_BYTE, remote_dest.node(), remote_dest.get_mpi_address(), size * sizeof(T), MPI_BYTE, peers[remote_dest.node()].rma_win); + MPI_Win_flush(remote_dest.node(), peers[remote_dest.node()].rma_win); + } + + // to be used by the host only + template + void send_data_async(request_reference_type req, T* local_source, buffer_ptr remote_dest, size_t size) + { + req.uses_rma_ = true; + + // MPI_Win_lock(MPI_LOCK_SHARED, remote_dest.node(), 0, peers[remote_dest.node()].rma_win); + MPI_Rput(local_source, size * sizeof(T), MPI_BYTE, remote_dest.node(), remote_dest.get_mpi_address(), size * sizeof(T), MPI_BYTE, peers[remote_dest.node()].rma_win, &req.next_mpi_request()); + } + + // not used in MPI RMA backend + // host uses async version + // targets don't use get + // should be safe to remove + template + void recv_data(buffer_ptr remote_source, T* local_dest, size_t size) + { + MPI_Get(remote_source, size * sizeof(T), MPI_BYTE, remote_source.node(), remote_source.get_mpi_address(), size * sizeof(T), MPI_BYTE, peers[remote_source.node()].rma_win); + MPI_Win_flush(remote_source.node(), peers[remote_source.node()].rma_win); + } + + // to be used by the host + template + void recv_data_async(request_reference_type req, buffer_ptr remote_source, T* local_dest, size_t size) + { + req.uses_rma_ = true; + + MPI_Rget(local_dest, size * sizeof(T), MPI_BYTE, remote_source.node(), remote_source.get_mpi_address(), size * sizeof(T), MPI_BYTE, peers[remote_source.node()].rma_win, &req.next_mpi_request()); + } + + template + buffer_ptr allocate_buffer(const size_t n, node_t source_node) + { + T* ptr; + posix_memalign((void**)&ptr, constants::CACHE_LINE_SIZE, n * sizeof(T)); + // attach to own window + MPI_Win_attach(peers[this_node_].rma_win, (void*)ptr, n * sizeof(T)); + MPI_Aint mpi_address; + MPI_Get_address((void*)ptr, &mpi_address); + // NOTE: no ctor is called + return buffer_ptr(ptr, this_node_, mpi_address); + } + + // for host to allocate peer message buffers, needed because original function now manages rma window which must not happen for host-only local buffers + template + buffer_ptr allocate_peer_buffer(const size_t n, node_t source_node) + { + T* ptr; + posix_memalign((void**)&ptr, constants::CACHE_LINE_SIZE, n * sizeof(T)); + // NOTE: no ctor is called + return buffer_ptr(ptr, this_node_); + } + + template + void free_buffer(buffer_ptr ptr) + { + assert(ptr.node() == this_node_); + // NOTE: no dtor is called + // remove from own rma window + MPI_Win_detach(peers[this_node_].rma_win, ptr.get()); + free(static_cast(ptr.get())); + } + + // for host to free peer message buffers, needed because original function now manages rma window which must not happen for host-only local buffers + template + void free_peer_buffer(buffer_ptr ptr) + { + assert(ptr.node() == this_node_); + // NOTE: no dtor is called + free(static_cast(ptr.get())); + } + + static communicator& instance() { return *instance_; } + static node_t this_node() { return instance().this_node_; } + static size_t num_nodes() { return instance().nodes_; } + bool is_host() { return this_node_ == 0; } // TODO(improvement): ham_address == ham_host_address ; } + bool is_host(node_t node) { return node == 0; } // TODO(improvement): node == ham_host_address; } + + static const node_descriptor& get_node_description(node_t node) + { + return instance().node_descriptions[node]; + } + +private: + static communicator* instance_; + node_t this_node_; + size_t nodes_; + node_t host_node_; + std::vector node_descriptions; // not as member in peer below, because Allgather is used to exchange node descriptions + + struct mpi_peer { + buffer_ptr msg_buffers; // buffers used for MPI_ISend and IRecv by the sender + + // needed by sender to manage which buffers are in use and which are free + // just manages indices, that can be used by + detail::resource_pool buffer_pool; + + // mpi rma dynamic window for data transfers + MPI_Win rma_win; + }; + + mpi_peer* peers; +}; + +template +buffer_ptr::buffer_ptr() : buffer_ptr(nullptr, communicator::this_node()) { } + +template +T& buffer_ptr::operator[](size_t i) +{ + assert(node_ == communicator::this_node()); + return ptr_[i]; +} + +} // namespace net +} // namespace ham + +#endif // ham_net_communicator_mpi_hpp From fc47b02a9f4c396be707c9c9142bdc081e859055 Mon Sep 17 00:00:00 2001 From: Daniel Deppisch Date: Sat, 8 Jun 2019 16:41:42 +0200 Subject: [PATCH 149/150] cleanup --- src/benchmark_streams.cpp | 168 ++++++++++++++++++ ...communicator_mpi_rma_dynamic_data_only.cpp | 9 + 2 files changed, 177 insertions(+) create mode 100644 src/benchmark_streams.cpp create mode 100644 src/ham/net/communicator_mpi_rma_dynamic_data_only.cpp diff --git a/src/benchmark_streams.cpp b/src/benchmark_streams.cpp new file mode 100644 index 0000000..049c4b8 --- /dev/null +++ b/src/benchmark_streams.cpp @@ -0,0 +1,168 @@ +// Copyright (c) 2013-2014 Matthias Noack (ma.noack.pr@gmail.com) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include "ham/offload.hpp" +#include "ham/offload/stream.hpp" + +#include + +#include "cereal/archives/binary.hpp" + +#include "ham/util/time.hpp" + +#include +#include + +using namespace std; +using namespace ham::util::time; +using namespace ham; + +// this is set in main. locally for host, through offload for target +// it is ugly, but is used to remove the allocation of the user-buffer from the benchmarked time, +// because we want to measure the overhead of the streaming abstraction, not how long it takes to instantiate user data +class cheese { +public: + static char* d1; + static size_t cheese_size; +}; +char* cheese::d1 = nullptr; +size_t cheese::cheese_size = 0; + +void set_cheese(size_t size) { + posix_memalign((void**)&cheese::d1, constants::CACHE_LINE_SIZE, size); + cheese::cheese_size = size; +} + +ham::offload::stream::stream_proxy offloaded_fun(ham::offload::stream::stream_proxy osp) +{ + ham::offload::stream::istream his(osp); + + { + cereal::BinaryInputArchive iarchive(his); + + iarchive(cereal::binary_data(cheese::d1, sizeof(char)*cheese::cheese_size)); + } + + //if(cheese::d1[1337] == 'a') cheese::d1[1337] = 'b'; + ham::offload::stream::ostream hos(0, cheese::cheese_size); + { + cereal::BinaryOutputArchive oarchive(hos); + oarchive(cereal::binary_data(cheese::d1, sizeof(char)*cheese::cheese_size)); + } + auto out_proxy = hos.sync(); + return out_proxy; +} + +int main(int argc, char* argv[]) +{ + // option defaults + unsigned int warmup_runs = 1; + unsigned int runs = 1000; + size_t data_size = 1024*1024; + + // command line options + boost::program_options::options_description desc("Supported options"); + desc.add_options() + ("help,h", "Shows this message") + ("runs,r", boost::program_options::value(&runs)->default_value(runs), "number of identical inner runs for which the average time will be computed") + ("warmup-runs", boost::program_options::value(&warmup_runs)->default_value(warmup_runs), "number of number of additional warmup runs before times are measured") + ("size,s", boost::program_options::value(&data_size)->default_value(data_size), "size of transferred data in byte (multiple of 4)") + ; + + boost::program_options::variables_map vm; + + boost::program_options::store(boost::program_options::command_line_parser(argc, argv).options(desc).allow_unregistered().run(), vm); + boost::program_options::notify(vm); + + ham::offload::node_t target = 1; + + // used to avoid benchmarking memory allocation for the target object on the target side + set_cheese(data_size); + ham::offload::ping(target, f2f(&set_cheese, data_size)); + + statistics comp_time(runs, warmup_runs); + statistics put_time(runs, warmup_runs); + statistics call_time(runs, warmup_runs); + statistics get_time(runs, warmup_runs); + statistics os_time(runs, warmup_runs); + statistics is_time(runs, warmup_runs); + + for (int i = 0; i < (runs + warmup_runs) ; ++i) { + //cheese::d1[1337] = 'a'; + timer comp; + ham::offload::stream::ostream hos(target, cheese::cheese_size); + timer ost; + { + cereal::BinaryOutputArchive oarchive(hos); + oarchive(cereal::binary_data(cheese::d1, sizeof(char)*data_size)); //sizeof(char)*data_size) + } + os_time.add(ost); + timer put; + auto out_proxy = hos.sync(); + put_time.add(put); + timer call; + auto in_proxy = ham::offload::sync(target, f2f(&offloaded_fun, out_proxy)); + call_time.add(call); + timer get; + ham::offload::stream::istream his(in_proxy); + get_time.add(get); + timer ist; + { + cereal::BinaryInputArchive iarchive(his); + iarchive(cereal::binary_data(cheese::d1, sizeof(char)*data_size)); + } + is_time.add(ist); + comp_time.add(comp); + //assert(cheese::d1[1337] == 'b'); + } + + std::string header_string = "name\t" + statistics::header_string() + "\tdata_size"; + + cout << endl <<"HAM-Offload stream overall: " << endl + << header_string << endl + << "stream:\t" << comp_time.string() << "\t" << data_size << endl << endl; + cout << "HAM-Offload streams ostream: " << endl + << header_string << endl + << "stream:\t" << os_time.string() << "\t" << data_size << endl << endl; + cout << "HAM-Offload streams copy-in: " << endl + << header_string << endl + << "stream:\t" << put_time.string() << "\t" << data_size << endl << endl; + cout << "HAM-Offload streamed call: " << endl + << header_string << endl + << "stream:\t" << call_time.string() << "\t" << data_size << endl << endl; + cout << "HAM-Offload streamed copy-out: " << endl + << header_string << endl + << "stream:\t" << get_time.string() << "\t" << data_size << endl << endl; + cout << "HAM-Offload streamed istream: " << endl + << header_string << endl + << "stream:\t" << is_time.string() << "\t" << data_size << endl << endl; + + + statistics str_time(1, 0); + + ham::offload::stream::ostream hos(target, cheese::cheese_size); + { + cereal::BinaryOutputArchive oarchive(hos); + oarchive(cereal::binary_data(cheese::d1, sizeof(char)*data_size)); //sizeof(char)*data_size) + } + timer str_tim; + + string tmp = hos.rdbuf()->str(); + + str_time.add(str_tim); + statistics cpy_time(1, 0); + statistics cstr_time(1, 0); + timer cpy_tim; + memcpy((void *) cheese::d1, tmp.c_str(), cheese::cheese_size); + cpy_time.add(cpy_tim); + timer cstr_tim; + const char* asdf = tmp.c_str(); + cstr_time.add(cstr_tim); + cout << str_time.string() << endl; + cout << cpy_time.string() << endl; + cout << cstr_time.string() << endl; + return 0; +} + diff --git a/src/ham/net/communicator_mpi_rma_dynamic_data_only.cpp b/src/ham/net/communicator_mpi_rma_dynamic_data_only.cpp new file mode 100644 index 0000000..e4e5dbd --- /dev/null +++ b/src/ham/net/communicator_mpi_rma_dynamic_data_only.cpp @@ -0,0 +1,9 @@ +// Copyright (c) 2013-2014 Matthias Noack (ma.noack.pr@gmail.com) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include "ham/net/communicator.hpp" + +ham::net::communicator* ham::net::communicator::instance_ = nullptr; + From 341523605f3134e7d74c0f80d96c5e55f123e3e6 Mon Sep 17 00:00:00 2001 From: Daniel Deppisch Date: Sat, 8 Jun 2019 16:46:48 +0200 Subject: [PATCH 150/150] cleanup --- include/ham/net/communicator.hpp | 4 +- .../ham/net/communicator_mpi_rma_dynamic.hpp | 552 +++++++----------- ...communicator_mpi_rma_dynamic_data_only.hpp | 106 ++-- include/ham/offload/offload.hpp | 16 +- include/ham/offload/offload_msg.hpp | 100 ++-- src/CMakeLists.txt | 40 +- src/ham/CMakeLists.txt | 37 +- 7 files changed, 375 insertions(+), 480 deletions(-) diff --git a/include/ham/net/communicator.hpp b/include/ham/net/communicator.hpp index 390279a..ea410f8 100644 --- a/include/ham/net/communicator.hpp +++ b/include/ham/net/communicator.hpp @@ -58,10 +58,12 @@ namespace net { #include "ham/net/communicator_scif.hpp" #elif defined HAM_COMM_MPI_RMA_DYNAMIC #include "ham/net/communicator_mpi_rma_dynamic.hpp" +#elif defined HAM_COMM_MPI_RMA_DYNAMIC_DATA_ONLY +#include "ham/net/communicator_mpi_rma_dynamic_data_only.hpp" #elif defined HAM_COMM_TCP #include "ham/net/communicator_tcp.hpp" #else -static_assert(false, "Please define either HAM_COMM_MPI, HAM_COMM_MPI_RMA_DYNAMIC or HAM_COMM_SCIF."); +static_assert(false, "Please define either HAM_COMM_MPI, HAM_COMM_SCIF, HAM_COMM_MPI_RMA_DYNAMIC, HAM_COMM_MPI_RMA_DYNAMIC_DATA_ONLY or HAM_COMM_TCP"); #endif #endif // ham_net_communicator_hpp diff --git a/include/ham/net/communicator_mpi_rma_dynamic.hpp b/include/ham/net/communicator_mpi_rma_dynamic.hpp index 4c4bb65..e74c1ba 100644 --- a/include/ham/net/communicator_mpi_rma_dynamic.hpp +++ b/include/ham/net/communicator_mpi_rma_dynamic.hpp @@ -17,7 +17,6 @@ #include "ham/misc/types.hpp" #include "ham/util/debug.hpp" #include "ham/util/log.hpp" -// #include "ham/util/time.hpp" #include "communicator.hpp" namespace ham { @@ -27,15 +26,15 @@ template class buffer_ptr { public: buffer_ptr(); - buffer_ptr(T* ptr, node_t node) : ptr_(ptr), node_(node), mpi_address_(0) { } + buffer_ptr(T* ptr, node_t node) : ptr_(ptr), node_(node), mpi_address_(0) { } buffer_ptr(T* ptr, node_t node, MPI_Aint mpi_address) : ptr_(ptr), node_(node), mpi_address_(mpi_address) { } T* get() { return ptr_; } node_t node() { return node_; } - MPI_Aint get_mpi_address() { return mpi_address_; } + MPI_Aint get_mpi_address() { return mpi_address_; } - // element access + // element access T& operator [] (size_t i); // basic pointer arithmetic to address sub-buffers @@ -47,15 +46,12 @@ class buffer_ptr { private: T* ptr_; node_t node_; - MPI_Aint mpi_address_; + MPI_Aint mpi_address_; }; class node_descriptor { public: - //node_descriptor() : name(MPI_MAX_PROCESSOR_NAME, 0) {} - - //const std::string& name() const { return name_; } const char* name() const { return name_; } private: //std::string name_; // TODO(improvement): unify node description for all back-ends, NOTE: std::string is not trivally transferable @@ -67,11 +63,11 @@ class node_descriptor class communicator { public: enum { - NO_BUFFER_INDEX = constants::MSG_BUFFERS, // invalid buffer index (max valid + 1) - FLAG_FALSE = constants::MSG_BUFFERS + 1 // special value, outside normal index range - }; + NO_BUFFER_INDEX = constants::MSG_BUFFERS, // invalid buffer index (max valid + 1) + FLAG_FALSE = constants::MSG_BUFFERS + 1 // special value, outside normal index range + }; - // externally used interface of request must be shared across all communicator-implementations + // externally used interface of request must be shared across all communicator-implementations class request { public: request() : valid_(false) {} // instantiate invalid @@ -81,38 +77,37 @@ class communicator { {} // return true if request was finished - // will not work as intended for rma ops, no equivalent to test() available for remote completion + // will not work as intended for rma ops, no equivalent to test() available for remote completion bool test() { - if(data_transfer_type) { // this will be true for rma data transfers - int flag = 0; - MPI_Testall(req_count, mpi_reqs, &flag, MPI_STATUS_IGNORE); // test on RGET is what we want, because local completion = full completion for get, but for RPut local is not enough and there is no non-blocking remote-completion test - HAM_DEBUG( HAM_LOG << "request::test(), warning: may give false positive on rma put remote completion" << std::endl; ) - // TODO - Daniel: this is bad but MPI RMA doesn't have anything better - // TODO - Daniel: discuss preliminary design decision with Matthias: false positive + longer block = better than false negative as users may poll on this and get stuck - return flag != 0; - } - return communicator::instance().test_local_flag(target_node, local_buffer_index); + if(data_transfer_type) { // this will be true for rma data transfers + int flag = 0; + MPI_Testall(req_count, mpi_reqs, &flag, MPI_STATUS_IGNORE); // test on RGET is what we want, because local completion = full completion for get, but for RPut local is not enough and there is no non-blocking remote-completion test + HAM_DEBUG( HAM_LOG << "request::test(), warning: may give false positive on rma put remote completion" << std::endl; ) + // TODO - Daniel: this is bad but MPI RMA doesn't have anything better + return flag != 0; + } + return communicator::instance().test_local_flag(target_node, local_buffer_index); } void* get() // blocks { - if(data_transfer_type) { - HAM_DEBUG( HAM_LOG << "request::get(), before MPI_Waitall()" << std::endl; ) - MPI_Waitall(req_count, mpi_reqs, MPI_STATUS_IGNORE); // Get will have fully completed - HAM_DEBUG( HAM_LOG << "request::get(), after MPI_Waitall()" << std::endl; ) - if(data_transfer_type == constants::DATA_PUT_CODE) { - HAM_DEBUG( HAM_LOG << "request::get(), before MPI_Win_flush()" << std::endl; ) - communicator::instance().flush_data(target_node); - HAM_DEBUG( HAM_LOG << "request::get(), after MPI_Win_flush()" << std::endl; ) - } - // this will only be true for async rma GETs - // there will be no result returned, so this won't poll on anything and return a dummy instead. - return nullptr; - // TODO - Daniel: this is bad but MPI RMA doesn't have anything better - } else { - return communicator::instance().recv_msg(target_node, local_buffer_index); - } + if(data_transfer_type) { + HAM_DEBUG( HAM_LOG << "request::get(), before MPI_Waitall()" << std::endl; ) + MPI_Waitall(req_count, mpi_reqs, MPI_STATUS_IGNORE); // Get will have fully completed + HAM_DEBUG( HAM_LOG << "request::get(), after MPI_Waitall()" << std::endl; ) + if(data_transfer_type == constants::DATA_PUT_CODE) { + HAM_DEBUG( HAM_LOG << "request::get(), before MPI_Win_flush()" << std::endl; ) + communicator::instance().flush_data(target_node); + HAM_DEBUG( HAM_LOG << "request::get(), after MPI_Win_flush()" << std::endl; ) + } + // this will only be true for async rma GETs + // there will be no result returned, so this won't poll on anything and return a dummy instead. + return nullptr; + // TODO - Daniel: this is bad but MPI RMA doesn't have anything better + } else { + return communicator::instance().recv_msg(target_node, local_buffer_index); + } } template @@ -120,8 +115,6 @@ class communicator { { assert(communicator::this_node() == target_node); // this assert fails if send_result is called from the wrong side - // TODO(improvement, low priority): better go through communicator, such that no MPI calls are anywhere else - // MPI_Send(result_msg, size, MPI_BYTE, source_node, constants::RESULT_TAG, MPI_COMM_WORLD); communicator::instance().send_msg(source_node, local_buffer_index, NO_BUFFER_INDEX, result_msg, size); } @@ -130,10 +123,10 @@ class communicator { return valid_; } - bool is_rma_data_transfer() const - { - return data_transfer_type; - } + bool is_rma_data_transfer() const + { + return data_transfer_type; + } MPI_Request& next_mpi_request() { @@ -145,7 +138,7 @@ class communicator { node_t target_node; node_t source_node; bool valid_; - short data_transfer_type; + short data_transfer_type; // only needed by the sender enum { NUM_REQUESTS = 3 }; @@ -194,121 +187,80 @@ class communicator { MPI_Get_processor_name(node_description.name_, &count); node_description.name_[count] = 0x0; // null terminate -// char hostname[MPI_MAX_PROCESSOR_NAME + 1]; -// MPI_Get_processor_name(hostname, &count); -// hostname[count] = 0x0; // null terminate -// node_description.name_.assign(hostname, count); - - // append rank for testing: - //node_description.name_[count] = 48 + this_node_; - //node_description.name_[count+1] = 0x0; - // communicate descriptors between nodes HAM_DEBUG( HAM_LOG << "communicator::communicator(): gathering node descriptions" << std::endl; ) - //MPI_Alltoall(&node_description, sizeof(node_descriptor), MPI_BYTE, node_descriptions.data(), sizeof(node_descriptor), MPI_BYTE, MPI_COMM_WORLD); MPI_Allgather(&node_description, sizeof(node_descriptor), MPI_BYTE, node_descriptions.data(), sizeof(node_descriptor), MPI_BYTE, MPI_COMM_WORLD); HAM_DEBUG( HAM_LOG << "communicator::communicator(): gathering node descriptions done" << std::endl; ) - /* - if (is_host()) { - - for (node_t i = 1; i < nodes_; ++i) { // TODO(improvement): needs to be changed when host-rank becomes configurable - // allocate buffers - peers[i].msg_buffers = allocate_peer_buffer(constants::MSG_BUFFERS, this_node_); - // fill resource pools - for (size_t j = constants::MSG_BUFFERS; j > 0; --j) { - peers[i].buffer_pool.add(j - 1); - } - } - }*/ - - // initialise data windows - for (node_t i = 0; i < nodes_; ++i) { - // dynamic data window - MPI_Win_create_dynamic(MPI_INFO_NULL, MPI_COMM_WORLD, &(peers[i].rma_data_win)); - } - - // initialise message windows - for (node_t i = 0; i < nodes_; ++i) { // loop through ranks - - if (i == this_node_) { // create local windows with allocated memory for targets, host creates one inbound set of windows for all targets - - // allocate memory and create windows - if (this_node_ == host_node_) { // host creates one large window with subsets associated with different targets - - // (MSG_SIZE+FLAG_SIZE) * MSG_BUFFERS * num_nodes = bytes of memory allocated (sizes are implicit in msg_flag_buffer struct) - peers[this_node_].msg_flag_data = allocate_peer_buffer(constants::MSG_BUFFERS * nodes_, this_node_); - // peers[this_node_].flag_data = allocate_peer_buffer(constants::MSG_BUFFERS * nodes_, this_node_); - // set flags to FLAG_FALSE - reset_flags(peers[this_node_].msg_flag_data, constants::MSG_BUFFERS * nodes_); // structs are contiguos, this is ok - - // fill resource pools for managing indices on the host - for (size_t j = 0; j < nodes_; ++j) { - for (size_t k = constants::MSG_BUFFERS; k > 0; --k) { - // target buffers - peers[j].local_buffer_pool.add(k - 1); - peers[j].remote_buffer_pool.add(k - 1); - } - // allocate first next_request, - allocate_next_request(j); - } - // create window with memory - MPI_Win_create((peers[this_node_].msg_flag_data.get()), sizeof(msg_flag_buffer) * constants::MSG_BUFFERS * nodes_, 1, MPI_INFO_NULL, MPI_COMM_WORLD, &(peers[this_node_].msg_flag_win)); - // MPI_Win_create((peers[this_node_].flag_data.get()), sizeof(cache_line_buffer) * constants::MSG_BUFFERS * nodes_, 1, MPI_INFO_NULL, MPI_COMM_WORLD, &(peers[this_node_].flag_win)); - - } else { // targets create one window with the size of their msg "queue" - // (MSG_SIZE+FLAG_SIZE) * MSG_BUFFERS = bytes of memory allocated (sizes are implicit in msg_flag_buffer struct) - peers[this_node_].msg_flag_data = allocate_peer_buffer(constants::MSG_BUFFERS, this_node_); - // peers[this_node_].flag_data = allocate_peer_buffer(constants::MSG_BUFFERS, this_node_); - // set flags to FLAG_FALSE - reset_flags(peers[this_node_].msg_flag_data, constants::MSG_BUFFERS); - - // create window with memory - MPI_Win_create((peers[this_node_].msg_flag_data.get()), sizeof(msg_buffer) * constants::MSG_BUFFERS, 1, MPI_INFO_NULL, MPI_COMM_WORLD, &(peers[this_node_].msg_flag_win)); - // MPI_Win_create((peers[this_node_].flag_data.get()), sizeof(cache_line_buffer) * constants::MSG_BUFFERS, 1, MPI_INFO_NULL, MPI_COMM_WORLD, &(peers[this_node_].flag_win)); - } - - // debug msg - HAM_DEBUG( HAM_LOG << "Rank: " << this_node_ << " in loop run " << i << " created REAL windows..." << std::endl; ) - - - } else { // create remote windows without memory (join the collective call and retreive the window handle) - - MPI_Win_create(nullptr, 0, 1, MPI_INFO_NULL, MPI_COMM_WORLD, &(peers[i].msg_flag_win)); - // MPI_Win_create(nullptr, 0, 1, MPI_INFO_NULL, MPI_COMM_WORLD, &(peers[i].flag_win)); - // debug msg - HAM_DEBUG( HAM_LOG << "Rank: " << this_node_ << " in loop run " << i << " creating EMPTY windows..." << std::endl; ) - //MPI_Win_allocate(0, 1, MPI_INFO_NULL, MPI_COMM_WORLD, peers[i].msg_win_data, &(peers[i].rma_msg_win)); - //MPI_Win_allocate(0, 1, MPI_INFO_NULL, MPI_COMM_WORLD, peers[i].flag_win_data, &(peers[i].rma_flag_win)); - } - } + + // initialise data windows + for (node_t i = 0; i < nodes_; ++i) { + // dynamic data window + MPI_Win_create_dynamic(MPI_INFO_NULL, MPI_COMM_WORLD, &(peers[i].rma_data_win)); + } + + // initialise message windows + for (node_t i = 0; i < nodes_; ++i) { // loop through ranks + + if (i == this_node_) { // create local windows with allocated memory for targets, host creates one inbound set of windows for all targets + + // allocate memory and create windows + if (this_node_ == host_node_) { // host creates one large window with subsets associated with different targets + + // (MSG_SIZE+FLAG_SIZE) * MSG_BUFFERS * num_nodes = bytes of memory allocated (sizes are implicit in msg_flag_buffer struct) + peers[this_node_].msg_flag_data = allocate_peer_buffer(constants::MSG_BUFFERS * nodes_, this_node_); + // peers[this_node_].flag_data = allocate_peer_buffer(constants::MSG_BUFFERS * nodes_, this_node_); + // set flags to FLAG_FALSE + reset_flags(peers[this_node_].msg_flag_data, constants::MSG_BUFFERS * nodes_); // structs are contiguos, this is ok + + // fill resource pools for managing indices on the host + for (size_t j = 0; j < nodes_; ++j) { + for (size_t k = constants::MSG_BUFFERS; k > 0; --k) { + // target buffers + peers[j].local_buffer_pool.add(k - 1); + peers[j].remote_buffer_pool.add(k - 1); + } + // allocate first next_request, + allocate_next_request(j); + } + // create window with memory + MPI_Win_create((peers[this_node_].msg_flag_data.get()), sizeof(msg_flag_buffer) * constants::MSG_BUFFERS * nodes_, 1, MPI_INFO_NULL, MPI_COMM_WORLD, &(peers[this_node_].msg_flag_win)); + // MPI_Win_create((peers[this_node_].flag_data.get()), sizeof(cache_line_buffer) * constants::MSG_BUFFERS * nodes_, 1, MPI_INFO_NULL, MPI_COMM_WORLD, &(peers[this_node_].flag_win)); + + } else { // targets create one window with the size of their msg "queue" + // (MSG_SIZE+FLAG_SIZE) * MSG_BUFFERS = bytes of memory allocated (sizes are implicit in msg_flag_buffer struct) + peers[this_node_].msg_flag_data = allocate_peer_buffer(constants::MSG_BUFFERS, this_node_); + // peers[this_node_].flag_data = allocate_peer_buffer(constants::MSG_BUFFERS, this_node_); + // set flags to FLAG_FALSE + reset_flags(peers[this_node_].msg_flag_data, constants::MSG_BUFFERS); + + // create window with memory + MPI_Win_create((peers[this_node_].msg_flag_data.get()), sizeof(msg_buffer) * constants::MSG_BUFFERS, 1, MPI_INFO_NULL, MPI_COMM_WORLD, &(peers[this_node_].msg_flag_win)); + // MPI_Win_create((peers[this_node_].flag_data.get()), sizeof(cache_line_buffer) * constants::MSG_BUFFERS, 1, MPI_INFO_NULL, MPI_COMM_WORLD, &(peers[this_node_].flag_win)); + } + + // debug msg + HAM_DEBUG( HAM_LOG << "Rank: " << this_node_ << " in loop run " << i << " created REAL windows..." << std::endl; ) + + } else { // create remote windows without memory (join the collective call and retreive the window handle) + + MPI_Win_create(nullptr, 0, 1, MPI_INFO_NULL, MPI_COMM_WORLD, &(peers[i].msg_flag_win)); + + // debug msg + HAM_DEBUG( HAM_LOG << "Rank: " << this_node_ << " in loop run " << i << " creating EMPTY windows..." << std::endl; ) + } + } // get all locks to targets for data - // targets lock to other targets for copies - for (node_t i = 0; i < nodes_; ++i) { // TODO(improvement): needs to be changed when host-rank becomes configurable - if (i != this_node_) { - MPI_Win_lock(MPI_LOCK_SHARED, i, 0, peers[i].rma_data_win); // shared locks because all ranks lock on every target concurrently - } - } - - // MPI_Barrier(MPI_COMM_WORLD); - - - /* // locking will be done when accessing remote memory - // locks for active message rma transfers - if (this_node_ != host_node_) { // targets - MPI_Win_lock(MPI_LOCK_SHARED, 0, 0, peers[0].msg_win); - MPI_Win_lock(MPI_LOCK_SHARED, 0, 0, peers[0].flag_win); - } else { // host - for (node_t i = 0; i < nodes_; ++i) { - MPI_Win_lock(MPI_LOCK_SHARED, i, 0, peers[i].msg_win); - MPI_Win_lock(MPI_LOCK_SHARED, i, 0, peers[i].flag_win); - } - } - */ - - HAM_DEBUG( HAM_LOG << "communicator::communicator(): rma window creation completed" << std::endl; ) - HAM_DEBUG( HAM_LOG << "communicator::communicator(): communicator initialization completed" << std::endl; ) + // targets lock to other targets for copies + for (node_t i = 0; i < nodes_; ++i) { // TODO(improvement): needs to be changed when host-rank becomes configurable + if (i != this_node_) { + MPI_Win_lock(MPI_LOCK_SHARED, i, 0, peers[i].rma_data_win); // shared locks because all ranks lock on every target concurrently + } + } + + HAM_DEBUG( HAM_LOG << "communicator::communicator(): rma window creation completed" << std::endl; ) + HAM_DEBUG( HAM_LOG << "communicator::communicator(): communicator initialization completed" << std::endl; ) } ~communicator() @@ -317,162 +269,118 @@ class communicator { HAM_DEBUG( HAM_LOG << "~communicator" << std::endl; ) } - // this is only used by the host to manage remote msg buffers and local reply buffers and assign them to requests - const request& allocate_next_request(node_t remote_node) - { - // this allocates a host-managed index for the remote nodes msg "queue" - // so the host knows which buffers are available on the target - const size_t remote_buffer_index = peers[remote_node].remote_buffer_pool.allocate(); - // this allocates an index in the hosts "reply queue" - // request is included in offload message, so the target knows into which buffers replys must be written - // when used, the index will need to be added to an offset determined by a targets rank to address the part of the buffer belonging to this target - // NOTE: the actual host buffer is stored at the hosts peers[0], but the buffer_pools are stored at the corresponding peers[target] - // buffer_pools manage idices within the targets section of the hosts buffer - const size_t local_buffer_index = peers[remote_node].local_buffer_pool.allocate(); - - peers[remote_node].next_request = {remote_node, this_node_, remote_buffer_index, local_buffer_index}; - - return peers[remote_node].next_request; - } - - // only used by host + // this is only used by the host to manage remote msg buffers and local reply buffers and assign them to requests + const request& allocate_next_request(node_t remote_node) + { + // this allocates a host-managed index for the remote nodes msg "queue" + // so the host knows which buffers are available on the target + const size_t remote_buffer_index = peers[remote_node].remote_buffer_pool.allocate(); + // this allocates an index in the hosts "reply queue" + // request is included in offload message, so the target knows into which buffers replys must be written + // when used, the index will need to be added to an offset determined by a targets rank to address the part of the buffer belonging to this target + // NOTE: the actual host buffer is stored at the hosts peers[0], but the buffer_pools are stored at the corresponding peers[target] + // buffer_pools manage idices within the targets section of the hosts buffer + const size_t local_buffer_index = peers[remote_node].local_buffer_pool.allocate(); + + peers[remote_node].next_request = {remote_node, this_node_, remote_buffer_index, local_buffer_index}; + + return peers[remote_node].next_request; + } + + // only used by host request allocate_request(node_t remote_node) { - HAM_DEBUG( HAM_LOG << "communicator::allocate_next_request(): remote_node = " << remote_node << std::endl; ) + HAM_DEBUG( HAM_LOG << "communicator::allocate_next_request(): remote_node = " << remote_node << std::endl; ) return peers[remote_node].next_request; } - // used for rma data transfers, so they wont take up unneeded buffer indices - // only put() and get() use this, copy() offloads an active msg to the data source and therefore uses allocate_request() - request allocate_data_request(node_t remote_node) { - HAM_DEBUG( HAM_LOG << "communicator::allocate_next_request(): remote_node = " << remote_node << std::endl; ) - return { remote_node, this_node_, NO_BUFFER_INDEX, NO_BUFFER_INDEX }; - } + // used for rma data transfers, so they wont take up unneeded buffer indices + // only put() and get() use this, copy() offloads an active msg to the data source and therefore uses allocate_request() + request allocate_data_request(node_t remote_node) { + HAM_DEBUG( HAM_LOG << "communicator::allocate_next_request(): remote_node = " << remote_node << std::endl; ) + return { remote_node, this_node_, NO_BUFFER_INDEX, NO_BUFFER_INDEX }; + } - // only used by host + // only used by host void free_request(request& req) { assert(req.valid()); assert(req.source_node == this_node_); - // dont do any of the following for data transfer requests - if(req.remote_buffer_index == NO_BUFFER_INDEX ) { - return; - } + // dont do any of the following for data transfer requests + if(req.remote_buffer_index == NO_BUFFER_INDEX ) { + return; + } - mpi_peer& peer = peers[req.target_node]; + mpi_peer& peer = peers[req.target_node]; - // reset local flag - // local flag is inside the hosts large array of msg_flag_buffers @ peers[host] - // index offset computed using target node - // as this is an access to rma window memory, we need to lock again... - MPI_Win_lock(MPI_LOCK_EXCLUSIVE, this_node_, 0, peers[this_node_].msg_flag_win); - size_t offset = constants::MSG_BUFFERS * req.target_node; // offset msg_flag_buffers to the corresponding nodes region - peers[this_node_].msg_flag_data.get()[offset + req.local_buffer_index].flag = FLAG_FALSE; - MPI_Win_unlock(this_node_, peers[this_node_].msg_flag_win); - // remote flag on target - /* This is done by the target after having received the new index to poll on - size_t remote_flag = FLAG_FALSE; - MPI_Put(&remote_flag, sizeof(remote_flag), MPI_BYTE, req.target_node, 0, sizeof(remote_flag), MPI_BYTE, peer.flag_win); - // flush? don't think so - */ + // reset local flag + // local flag is inside the hosts large array of msg_flag_buffers @ peers[host] + // index offset computed using target node + // as this is an access to rma window memory, we need to lock again... + MPI_Win_lock(MPI_LOCK_EXCLUSIVE, this_node_, 0, peers[this_node_].msg_flag_win); + size_t offset = constants::MSG_BUFFERS * req.target_node; // offset msg_flag_buffers to the corresponding nodes region + peers[this_node_].msg_flag_data.get()[offset + req.local_buffer_index].flag = FLAG_FALSE; + MPI_Win_unlock(this_node_, peers[this_node_].msg_flag_win); - peer.remote_buffer_pool.free(req.remote_buffer_index); + peer.remote_buffer_pool.free(req.remote_buffer_index); - peer.local_buffer_pool.free(req.local_buffer_index); + peer.local_buffer_pool.free(req.local_buffer_index); - req.valid_ = false; - } + req.valid_ = false; + } public: - // make private?! - // called by func below - void send_msg(node_t node, size_t buffer_index, size_t next_buffer_index, void* msg, size_t size) { - // write msg to target msg buffer - HAM_DEBUG( HAM_LOG << "communicator::send_msg(): node = " << node << std::endl; ) - HAM_DEBUG( HAM_LOG << "communicator::send_msg(): remote buffer index = " << buffer_index << std::endl; ) - - if (node != host_node_) { // to targets - // ham::util::time::statistics msg_put(1,0); - // ham::util::time::statistics flush(1,0); - // ham::util::time::statistics flag_put(1,0); - - // ham::util::time::timer t1; - MPI_Win_lock(MPI_LOCK_EXCLUSIVE, node, 0, peers[node].msg_flag_win); - // put msg + // make private?! + // called by func below + void send_msg(node_t node, size_t buffer_index, size_t next_buffer_index, void* msg, size_t size) { + // write msg to target msg buffer + HAM_DEBUG( HAM_LOG << "communicator::send_msg(): node = " << node << std::endl; ) + HAM_DEBUG( HAM_LOG << "communicator::send_msg(): remote buffer index = " << buffer_index << std::endl; ) + + if (node != host_node_) { // to targets + + MPI_Win_lock(MPI_LOCK_EXCLUSIVE, node, 0, peers[node].msg_flag_win); + // put msg MPI_Put(msg, size, MPI_BYTE, node, sizeof(msg_flag_buffer) * buffer_index, size, MPI_BYTE, peers[node].msg_flag_win); // put flag MPI_Put(&next_buffer_index, sizeof(next_buffer_index), MPI_BYTE, node, sizeof(msg_flag_buffer) * buffer_index + constants::MSG_SIZE, sizeof(next_buffer_index), MPI_BYTE, peers[node].msg_flag_win); - // msg_put.add(t1); + MPI_Win_unlock(node, peers[node].msg_flag_win); + HAM_DEBUG( HAM_LOG << "communicator::send_msg(): wrote msg + flag" << std::endl; ) - // unlock includes flush, no need for it here - // ham::util::time::timer t2; - // MPI_Win_flush(node, peers[node].msg_win); - // flush.add(t2); - // HAM_DEBUG( HAM_LOG << "communicator::send_msg(): flushed msg" << std::endl; ) - // HAM_DEBUG( HAM_LOG << "communicator::send_msg(): flushing msg took: " << ""/*flush.min().count()*/ << std::endl; ) - - // write flag to target flags buffer - // not sure on the size here? - // ham::util::time::timer t3; - // MPI_Win_lock(MPI_LOCK_EXCLUSIVE, node, 0, peers[node].msg_flag_win); - // flag_put.add(t3); - // HAM_DEBUG( HAM_LOG << "communicator::send_msg(): wrote flag" << std::endl; ) - // HAM_DEBUG( HAM_LOG << "communicator::send_msg(): writing flag took: " << ""/*flag_put.min().count()*/ <(&buffer); */ - return static_cast(recv_msg(host_node_, NO_BUFFER_INDEX, msg, size)); + return static_cast(recv_msg(host_node_, NO_BUFFER_INDEX, msg, size)); } // trigger asyncly receiving the result of a message on the sending side @@ -528,27 +432,27 @@ class communicator { // nothing to do here, since this communicator implementation uses one-sided communication // the data will be written to where it is expected HAM_DEBUG( HAM_LOG << "communicator::recv_result(): This does nothing with the MPI RMA communicator" << std::endl; ) - return; + return; } - // only used by the host through request.test() (top of this file) called by future.test() (offload.hpp) - bool test_local_flag(node_t node, size_t buffer_index) { + // only used by the host through request.test() (top of this file) called by future.test() (offload.hpp) + bool test_local_flag(node_t node, size_t buffer_index) { size_t offset = constants::MSG_BUFFERS * node; flag_t temp_flag = FLAG_FALSE; // public window flag changes may have not have been updated in local window... so we need to lock again here MPI_Win_lock(MPI_LOCK_EXCLUSIVE, this_node_, 0, peers[this_node_].msg_flag_win); temp_flag = peers[node].msg_flag_data.get()[offset + buffer_index].flag; MPI_Win_unlock(this_node_, peers[this_node_].msg_flag_win); - return temp_flag != FLAG_FALSE; - } + return temp_flag != FLAG_FALSE; + } - void flush_data(node_t node) { - MPI_Win_flush(node, peers[node].rma_data_win); - } + void flush_data(node_t node) { + MPI_Win_flush(node, peers[node].rma_data_win); + } // this is only called @ communicator construction to initialize flags with FLAG_FALSE // calling this at any other point may reset flags belonging to messages that have not yet been executed (and never will be then) - void reset_flags(buffer_ptr msg_flags, size_t size) { + void reset_flags(buffer_ptr msg_flags, size_t size) { // now this is where a struct of arrays would have been cooler... // TODO - Daniel: Ask Matthias if he knows a cooler solution for (int i = 0; i <= size ; ++i) { @@ -562,15 +466,15 @@ class communicator { template void send_data(T* local_source, buffer_ptr remote_dest, size_t size) { // execute transfer - MPI_Put(local_source, size * sizeof(T), MPI_BYTE, remote_dest.node(), remote_dest.get_mpi_address(), size * sizeof(T), MPI_BYTE, peers[remote_dest.node()].rma_data_win); - MPI_Win_flush(remote_dest.node(), peers[remote_dest.node()].rma_data_win); + MPI_Put(local_source, size * sizeof(T), MPI_BYTE, remote_dest.node(), remote_dest.get_mpi_address(), size * sizeof(T), MPI_BYTE, peers[remote_dest.node()].rma_data_win); + MPI_Win_flush(remote_dest.node(), peers[remote_dest.node()].rma_data_win); } // to be used by the host only template void send_data_async(request_reference_type req, T* local_source, buffer_ptr remote_dest, size_t size) { - req.data_transfer_type = constants::DATA_PUT_CODE; - MPI_Rput(local_source, size * sizeof(T), MPI_BYTE, remote_dest.node(), remote_dest.get_mpi_address(), size * sizeof(T), MPI_BYTE, peers[remote_dest.node()].rma_data_win, &req.next_mpi_request()); + req.data_transfer_type = constants::DATA_PUT_CODE; + MPI_Rput(local_source, size * sizeof(T), MPI_BYTE, remote_dest.node(), remote_dest.get_mpi_address(), size * sizeof(T), MPI_BYTE, peers[remote_dest.node()].rma_data_win, &req.next_mpi_request()); } // not used in MPI RMA backend @@ -587,8 +491,8 @@ class communicator { // to be used by the host template void recv_data_async(request_reference_type req, buffer_ptr remote_source, T* local_dest, size_t size) { - req.data_transfer_type = constants::DATA_GET_CODE; - MPI_Rget(local_dest, size * sizeof(T), MPI_BYTE, remote_source.node(), remote_source.get_mpi_address(), size * sizeof(T), MPI_BYTE, peers[remote_source.node()].rma_data_win, &req.next_mpi_request()); + req.data_transfer_type = constants::DATA_GET_CODE; + MPI_Rget(local_dest, size * sizeof(T), MPI_BYTE, remote_source.node(), remote_source.get_mpi_address(), size * sizeof(T), MPI_BYTE, peers[remote_source.node()].rma_data_win, &req.next_mpi_request()); } template @@ -596,11 +500,10 @@ class communicator { { T* ptr; - // posix_memalign((void**)&ptr, constants::CACHE_LINE_SIZE, n * sizeof(T)); MPI_Alloc_mem(n * sizeof(T), MPI_INFO_NULL, &ptr); - // attach to own window - HAM_DEBUG( HAM_LOG << "communicator::allocate_buffer(), allocating buffer @: " << (long)ptr << std::endl; ) - MPI_Win_attach(peers[this_node_].rma_data_win, (void*)ptr, n * sizeof(T)); + // attach to own window + HAM_DEBUG( HAM_LOG << "communicator::allocate_buffer(), allocating buffer @: " << (long)ptr << std::endl; ) + MPI_Win_attach(peers[this_node_].rma_data_win, (void*)ptr, n * sizeof(T)); MPI_Aint mpi_address; MPI_Get_address((void*)ptr, &mpi_address); @@ -612,38 +515,32 @@ class communicator { template buffer_ptr allocate_peer_buffer(const size_t n, node_t source_node) { - T* ptr; - // posix_memalign((void**)&ptr, constants::CACHE_LINE_SIZE, n * sizeof(T)); // if you revert to memalign, also change back free in free_peer_buffer() - // using MPI_Alloc instead as these buffers are used for RMA accesses + T* ptr; + // using MPI_Alloc instead of posix_memalign as these buffers are used for RMA accesses MPI_Alloc_mem(n * sizeof(T), MPI_INFO_NULL, &ptr); // NOTE: no ctor is called return buffer_ptr(ptr, this_node_); } - // used for data buffers only + // used for data buffers only template void free_buffer(buffer_ptr ptr) { assert(ptr.node() == this_node_); // NOTE: no dtor is called - // remove from own rma window - HAM_DEBUG( HAM_LOG << "communicator::free_buffer(), freeing buffer @: " << (long)ptr.get() << " belonging to node: " << ptr.node() << std::endl; ) - MPI_Win_detach(peers[this_node_].rma_data_win, ptr.get()); - /* for (node_t i = 1; i < nodes_; ++i) { // nonsense, all accesses to a rank will only take place on that targets window, no need to attach to other - MPI_Win_detach(peers[i].rma_data_win, ptr.get()); - } */ - // free(static_cast(ptr.get())); // switch back to this if you revert back from using MPI_alloc_mem() + // remove from own rma window + HAM_DEBUG( HAM_LOG << "communicator::free_buffer(), freeing buffer @: " << (long)ptr.get() << " belonging to node: " << ptr.node() << std::endl; ) + MPI_Win_detach(peers[this_node_].rma_data_win, ptr.get()); MPI_Free_mem(ptr.get()); } - // for host to free peer message buffers, needed because original function now manages rma window which must not happen for host-only local buffers + // for host to free peer message buffers, needed because original function now manages rma window which must not happen for host-only local buffers template void free_peer_buffer(buffer_ptr ptr) { - // this will never be called on the actual memory mapped to static mpi windows, freeing it would equal "disconnecting" the corresponding target + // this must never be called on the actual memory mapped to static mpi windows, freeing it would equal "disconnecting" the corresponding target assert(ptr.node() == this_node_); // NOTE: no dtor is called - // free(static_cast(ptr.get())); // switch back to this if you revert back from using MPI_alloc_mem() MPI_Free_mem(ptr.get()); } @@ -665,30 +562,27 @@ class communicator { node_t host_node_; std::vector node_descriptions; // not as member in peer below, because Allgather is used to exchange node descriptions - struct mpi_peer { - - // buffer_ptr msg_buffers; // buffers used for MPI_ISend and IRecv by the sender // not needed for RMA version, host-side RMA window is used instead - + struct mpi_peer { // needed by sender to manage which buffers are in use and which are free // just manages indices, that can be used by detail::resource_pool local_buffer_pool; - detail::resource_pool remote_buffer_pool; + detail::resource_pool remote_buffer_pool; - request next_request; - size_t next_flag = 0; - // NOTE: behind these buffers are MSG_BUFFERS many buffers of size MSG_SIZE+FLAG_SIZE, indices are managed by buffer_pool + request next_request; + size_t next_flag = 0; + // NOTE: behind these buffers are MSG_BUFFERS many buffers of size MSG_SIZE+FLAG_SIZE, indices are managed by buffer_pool - // static window for inbound rma messages and their flags - buffer_ptr msg_flag_data; - MPI_Win msg_flag_win; + // static window for inbound rma messages and their flags + buffer_ptr msg_flag_data; + MPI_Win msg_flag_win; // mpi rma dynamic window for data MPI_Win rma_data_win; }; - mpi_peer* peers; - }; + +}; template buffer_ptr::buffer_ptr() : buffer_ptr(nullptr, communicator::this_node()) { } @@ -703,4 +597,4 @@ T& buffer_ptr::operator[](size_t i) } // namespace net } // namespace ham -#endif // ham_net_communicator_mpi_hpp +#endif // ham_net_communicator_mpi_rma_dynamic_hpp diff --git a/include/ham/net/communicator_mpi_rma_dynamic_data_only.hpp b/include/ham/net/communicator_mpi_rma_dynamic_data_only.hpp index 4dff738..eb2c762 100644 --- a/include/ham/net/communicator_mpi_rma_dynamic_data_only.hpp +++ b/include/ham/net/communicator_mpi_rma_dynamic_data_only.hpp @@ -3,8 +3,8 @@ // Distributed under the Boost Software License, Version 1.0. (See accompanying // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) -#ifndef ham_net_communicator_mpi_rma_dynamic_hpp -#define ham_net_communicator_mpi_rma_dynamic_hpp +#ifndef ham_net_communicator_mpi_rma_dynamic_data_only_hpp +#define ham_net_communicator_mpi_rma_dynamic_data_only_hpp #include @@ -51,9 +51,6 @@ class buffer_ptr { class node_descriptor { public: - //node_descriptor() : name(MPI_MAX_PROCESSOR_NAME, 0) {} - - //const std::string& name() const { return name_; } const char* name() const { return name_; } private: //std::string name_; // TODO(improvement): unify node description for all back-ends, NOTE: std::string is not trivally transferable @@ -74,18 +71,17 @@ class communicator { {} // return true if request was finished - // will not work as intended for rma ops, no equivalent to test() available for remote completion + // will not work as intended for rma ops, no equivalent to test() available for remote completion bool test() { int flag = 0; MPI_Testall(req_count, mpi_reqs, &flag, MPI_STATUS_IGNORE); // just test the receive request, since the send belonging to the request triggers the remote send that is received - if(uses_rma_) - { - HAM_DEBUG( HAM_LOG << "request::test(), warning: may give false positive on rma remote completion" << std::endl; ) - } - - return flag != 0; + if(uses_rma_) + { + HAM_DEBUG( HAM_LOG << "request::test(), warning: may give false positive on rma remote completion" << std::endl; ) + } + return flag != 0; } void* get() // blocks @@ -93,10 +89,10 @@ class communicator { HAM_DEBUG( HAM_LOG << "request::get(), before MPI_Waitall()" << std::endl; ) MPI_Waitall(req_count, mpi_reqs, MPI_STATUS_IGNORE); // must wait for all requests to satisfy the standard HAM_DEBUG( HAM_LOG << "request::get(), after MPI_Waitall()" << std::endl; ) - if(uses_rma_) - { - MPI_Win_flush(target_node, communicator::instance().peers[target_node].rma_win); - } + if(uses_rma_) + { + MPI_Win_flush(target_node, communicator::instance().peers[target_node].rma_win); + } return static_cast(&communicator::instance().peers[target_node].msg_buffers[recv_buffer_index]); } @@ -114,10 +110,10 @@ class communicator { return valid_; } - bool uses_rma() const - { - return uses_rma_; - } + bool uses_rma() const + { + return uses_rma_; + } MPI_Request& next_mpi_request() { @@ -129,7 +125,7 @@ class communicator { node_t target_node; node_t source_node; bool valid_; - bool uses_rma_; + bool uses_rma_; // only needed by the sender enum { NUM_REQUESTS = 3 }; @@ -184,32 +180,32 @@ class communicator { HAM_DEBUG( HAM_LOG << "communicator::communicator(): gathering node descriptions done" << std::endl; ) - if (is_host()) { + if (is_host()) { - for (node_t i = 1; i < nodes_; ++i) { // TODO(improvement): needs to be changed when host-rank becomes configurable - // allocate buffers - peers[i].msg_buffers = allocate_peer_buffer(constants::MSG_BUFFERS, this_node_); - // fill resource pools - for (size_t j = constants::MSG_BUFFERS; j > 0; --j) { - peers[i].buffer_pool.add(j - 1); - } - } - } + for (node_t i = 1; i < nodes_; ++i) { // TODO(improvement): needs to be changed when host-rank becomes configurable + // allocate buffers + peers[i].msg_buffers = allocate_peer_buffer(constants::MSG_BUFFERS, this_node_); + // fill resource pools + for (size_t j = constants::MSG_BUFFERS; j > 0; --j) { + peers[i].buffer_pool.add(j - 1); + } + } + } - // initialise 1 global window per target for data - for (node_t i = 1; i < nodes_; ++i) { - MPI_Win_create_dynamic(MPI_INFO_NULL, MPI_COMM_WORLD, &(peers[i].rma_win)); - } + // initialise 1 global window per target for data + for (node_t i = 1; i < nodes_; ++i) { + MPI_Win_create_dynamic(MPI_INFO_NULL, MPI_COMM_WORLD, &(peers[i].rma_win)); + } - // get all locks to targets - // targets lock to other targets for copies - for (node_t i = 1; i < nodes_; ++i) { // TODO(improvement): needs to be changed when host-rank becomes configurable - if(i != this_node_) { - MPI_Win_lock(MPI_LOCK_SHARED, i, 0, peers[i].rma_win); // shared locks because all ranks lock on every target concurrently - } - } + // get all locks to targets + // targets lock to other targets for copies + for (node_t i = 1; i < nodes_; ++i) { // TODO(improvement): needs to be changed when host-rank becomes configurable + if(i != this_node_) { + MPI_Win_lock(MPI_LOCK_SHARED, i, 0, peers[i].rma_win); // shared locks because all ranks lock on every target concurrently + } + } - HAM_DEBUG( HAM_LOG << "communicator::communicator(): rma window creation done" << std::endl; ) + HAM_DEBUG( HAM_LOG << "communicator::communicator(): rma window creation done" << std::endl; ) } @@ -257,7 +253,7 @@ class communicator { { static msg_buffer buffer; // NOTE ! MPI_Recv(&buffer, size, MPI_BYTE, host_node_, constants::DEFAULT_TAG, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - return static_cast(&buffer); + return static_cast(&buffer); } // trigger receiving the result of a message on the sending side @@ -277,17 +273,16 @@ class communicator { { // execute transfer MPI_Put(local_source, size * sizeof(T), MPI_BYTE, remote_dest.node(), remote_dest.get_mpi_address(), size * sizeof(T), MPI_BYTE, peers[remote_dest.node()].rma_win); - MPI_Win_flush(remote_dest.node(), peers[remote_dest.node()].rma_win); + MPI_Win_flush(remote_dest.node(), peers[remote_dest.node()].rma_win); } // to be used by the host only template void send_data_async(request_reference_type req, T* local_source, buffer_ptr remote_dest, size_t size) { - req.uses_rma_ = true; + req.uses_rma_ = true; - // MPI_Win_lock(MPI_LOCK_SHARED, remote_dest.node(), 0, peers[remote_dest.node()].rma_win); - MPI_Rput(local_source, size * sizeof(T), MPI_BYTE, remote_dest.node(), remote_dest.get_mpi_address(), size * sizeof(T), MPI_BYTE, peers[remote_dest.node()].rma_win, &req.next_mpi_request()); + MPI_Rput(local_source, size * sizeof(T), MPI_BYTE, remote_dest.node(), remote_dest.get_mpi_address(), size * sizeof(T), MPI_BYTE, peers[remote_dest.node()].rma_win, &req.next_mpi_request()); } // not used in MPI RMA backend @@ -305,8 +300,7 @@ class communicator { template void recv_data_async(request_reference_type req, buffer_ptr remote_source, T* local_dest, size_t size) { - req.uses_rma_ = true; - + req.uses_rma_ = true; MPI_Rget(local_dest, size * sizeof(T), MPI_BYTE, remote_source.node(), remote_source.get_mpi_address(), size * sizeof(T), MPI_BYTE, peers[remote_source.node()].rma_win, &req.next_mpi_request()); } @@ -315,9 +309,9 @@ class communicator { { T* ptr; posix_memalign((void**)&ptr, constants::CACHE_LINE_SIZE, n * sizeof(T)); - // attach to own window - MPI_Win_attach(peers[this_node_].rma_win, (void*)ptr, n * sizeof(T)); - MPI_Aint mpi_address; + // attach to own window + MPI_Win_attach(peers[this_node_].rma_win, (void*)ptr, n * sizeof(T)); + MPI_Aint mpi_address; MPI_Get_address((void*)ptr, &mpi_address); // NOTE: no ctor is called return buffer_ptr(ptr, this_node_, mpi_address); @@ -338,8 +332,8 @@ class communicator { { assert(ptr.node() == this_node_); // NOTE: no dtor is called - // remove from own rma window - MPI_Win_detach(peers[this_node_].rma_win, ptr.get()); + // remove from own rma window + MPI_Win_detach(peers[this_node_].rma_win, ptr.get()); free(static_cast(ptr.get())); } @@ -397,4 +391,4 @@ T& buffer_ptr::operator[](size_t i) } // namespace net } // namespace ham -#endif // ham_net_communicator_mpi_hpp +#endif // ham_net_communicator_mpi_rma_dynamic_data_only_hpp diff --git a/include/ham/offload/offload.hpp b/include/ham/offload/offload.hpp index dafb6da..b96c988 100644 --- a/include/ham/offload/offload.hpp +++ b/include/ham/offload/offload.hpp @@ -237,7 +237,7 @@ future put(T* local_source, buffer_ptr& remote_dest, size_t n) comm.recv_result(result.get_request()); // trigger receiving the msgs result // async return result; -#elif HAM_COMM_MPI_RMA_DYNAMIC +#elif defined(HAM_COMM_MPI_RMA_DYNAMIC) || defined(HAM_COMM_MPI_RMA_DYNAMIC_DATA_ONLY) future result(comm.allocate_data_request(remote_dest.node())); HAM_DEBUG( HAM_LOG << "offload::put(): initiating RMA put..." << std::endl; ) comm.send_data_async(result.get_request(), local_source, remote_dest, n); @@ -275,7 +275,7 @@ future get(buffer_ptr remote_source, T* local_dest, size_t n) comm.recv_result(result.get_request()); // trigger receiving the result // TODO(improvement): the recv_result() is not needed, could remove and remove send_result() from offload_read_msg to reduce synchronization overhead return result; -#elif defined HAM_COMM_MPI_RMA_DYNAMIC +#elif defined(HAM_COMM_MPI_RMA_DYNAMIC) || defined(HAM_COMM_MPI_RMA_DYNAMIC_DATA_ONLY) future result(comm.allocate_data_request(remote_source.node())); HAM_DEBUG( HAM_LOG << "offload::put(): initiating RMA get..." << std::endl; ) comm.recv_data_async(result.get_request(), remote_source, local_dest, n); @@ -312,7 +312,7 @@ void get_sync(buffer_ptr remote_source, T* local_dest, size_t n) //} -#ifdef HAM_COMM_MPI_RMA_DYNAMIC +#if defined(HAM_COMM_MPI_RMA_DYNAMIC) || defined(HAM_COMM_MPI_RMA_DYNAMIC_DATA_ONLY) template future copy(buffer_ptr source, buffer_ptr dest, size_t n) { @@ -342,7 +342,7 @@ void copy_sync(buffer_ptr source, buffer_ptr dest, size_t n) // fix 1st arg: // comm.send_data(src_node, local_source, remote_dest, n); // static_assert(false, "copy is not implemented yet for the SCIF back-end"); -#elif defined HAM_COMM_MPI +#elif defined(HAM_COMM_MPI) // send corresponding write and read messages to the sender and the receiver // issues a send operation on the source node, that sends the memory at source to the destination node @@ -361,10 +361,10 @@ void copy_sync(buffer_ptr source, buffer_ptr dest, size_t n) // TODO(improvement): this is oversynchronized, waiting for the target to complete receiving should be sufficient read_result.get(); write_result.get(); -#elif defined HAM_COMM_MPI_RMA_DYNAMIC - // use async copy + sync - copy(source, dest, n).get(); -#elif defined HAM_COMM_TCP +#elif defined(HAM_COMM_MPI_RMA_DYNAMIC) || defined(HAM_COMM_MPI_RMA_DYNAMIC_DATA_ONLY) + // use async copy + sync + copy(source, dest, n).get(); +#elif defined(HAM_COMM_TCP) T* ptr; posix_memalign((void**)&ptr, constants::CACHE_LINE_SIZE, n * sizeof(T)); get_sync(source, ptr, n); diff --git a/include/ham/offload/offload_msg.hpp b/include/ham/offload/offload_msg.hpp index 01f4e9d..7e381b6 100644 --- a/include/ham/offload/offload_msg.hpp +++ b/include/ham/offload/offload_msg.hpp @@ -6,9 +6,11 @@ #ifndef ham_offload_offload_msg_hpp #define ham_offload_offload_msg_hpp -#ifdef HAM_COMM_MPI_RMA_DYNAMIC +// for the copy msg we want to store the remote memory address as MPI_Aint +#if defined(HAM_COMM_MPI_RMA_DYNAMIC) || defined(HAM_COMM_MPI_RMA_DYNAMIC_DATA_ONLY) #include #endif + #include "ham/msg/active_msg.hpp" #include "ham/msg/execution_policy.hpp" #include "ham/misc/constants.hpp" @@ -43,6 +45,7 @@ struct helper { }; // executes the functor, and send back its result +// used for all offloads, remote allocation template class ExecutionPolicy = default_execution_policy> class offload_result_msg : public active_msg, ExecutionPolicy> @@ -68,6 +71,7 @@ class offload_result_msg }; // just execute the functor +// fire & forget, not used by current HAM-Offload API template class ExecutionPolicy = default_execution_policy> class offload_msg : public active_msg, ExecutionPolicy> @@ -83,7 +87,8 @@ class offload_msg } }; -// should not be used by MPI_RMA_COMMUNICATOR since one-sided put is used +// data transfer message type, triggers RECEIVING data at the target +// not used by MPI_RMA_COMMUNICATOR since one-sided put is used template class ExecutionPolicy = default_execution_policy> class offload_write_msg : public active_msg, ExecutionPolicy> @@ -97,7 +102,6 @@ class offload_write_msg communicator::instance().recv_data(buffer_ptr(nullptr, remote_node), local_dest, n); // NOTE: Why nullptr? This is for two-sided communicators, so we do not know the remote address, but match a send operation that has the address. // send a result to tell the sender, that the transfer is done - // TODO(improvement): this may be if (req.valid()) { req.send_result((void*)&n, sizeof n); } @@ -111,7 +115,8 @@ class offload_write_msg }; -// should not be used by MPI_RMA_COMMUNICATOR since one-sided put is used +// data transfer message type, triggers SENDING data at the target +// not used by MPI_RMA_COMMUNICATOR since one-sided put is used template class ExecutionPolicy = default_execution_policy> class offload_read_msg : public active_msg, ExecutionPolicy> @@ -125,7 +130,7 @@ class offload_read_msg communicator::instance().send_data(local_source, buffer_ptr(nullptr, remote_node), n); // NOTE: Why nullptr? This is for two-sided communicators, so we do not know the remote address, but match a receive operation that has the address. // send a result message to tell the sender, that the transfer is done - // TODO(improvement): this may be removed along with receiving the result in offload get() + // TODO(improvement, potential speedup): this may be removed along with receiving the result in offload get(). For host-target transfer completion of receive is sufficient, for copy the destination informs the host of completion if (req.valid()) { req.send_result((void*)&n, sizeof n); } @@ -138,62 +143,37 @@ class offload_read_msg size_t n; }; -#ifdef HAM_COMM_MPI_RMA_DYNAMIC - template class ExecutionPolicy = default_execution_policy> - class offload_rma_copy_msg - : public active_msg, ExecutionPolicy> - { - public: - offload_rma_copy_msg(communicator::request req, node_t remote_node, MPI_Aint remote_addr, T* local_source, size_t n) - : req(req), remote_node(remote_node), remote_addr(remote_addr), local_source(local_source), n(n) { } - - void operator()() //const - { - /* communicator::instance().establish_rma_path(remote_node); // should quickly return if path already exists - // attach existing buffers to new target window ?!? - */ - communicator::instance().send_data(local_source, buffer_ptr(nullptr, remote_node, remote_addr), n); - - // send a result message to tell the sender, that the transfer is done - if (req.valid()) { - req.send_result((void*)&n, sizeof n); - } - } - private: - communicator::request req; // TODO(improvement, high priority): use a subset of req here! - - node_t remote_node; - MPI_Aint remote_addr; - T* local_source; - size_t n; - }; -#endif - -/* -// allows user to setup an rma link between two targets without a copy transfer -#ifdef HAM_COMM_MPI_RMA_DYNAMIC - template class ExecutionPolicy = default_execution_policy> - class setup_rma_path_msg - : public active_msg, ExecutionPolicy> - { - public: - setup_rma_path_msg(node_t remote_node) - : remote_node(remote_node) { } - - void operator()() //const - { - communicator::instance().establish_rma_path(remote_node); - - // send a result message to tell the sender that the path is set up - if (req.valid()) { - req.send_result((void*)&remote_node, sizeof remote_node); - } - } - private: - node_t remote_node; - }; +#if defined(HAM_COMM_MPI_RMA_DYNAMIC) || defined(HAM_COMM_MPI_RMA_DYNAMIC_DATA_ONLY) + // data transfer message, triggers RMA data transfer to copy target + // used only with MPI_RMA communicator + // necessary because of the target buffer's address (remote_addr) + template class ExecutionPolicy = default_execution_policy> + class offload_rma_copy_msg + : public active_msg, ExecutionPolicy> + { + public: + offload_rma_copy_msg(communicator::request req, node_t remote_node, MPI_Aint remote_addr, T* local_source, size_t n) + : req(req), remote_node(remote_node), remote_addr(remote_addr), local_source(local_source), n(n) { } + + void operator()() //const + { + // MPI_RMA_COMMUNICATOR-only variant of send_data(), because of buffer address (remote_addr) + communicator::instance().send_data(local_source, buffer_ptr(nullptr, remote_node, remote_addr), n); + + // send a result message to tell the sender, that the transfer is done + if (req.valid()) { + req.send_result((void*)&n, sizeof n); + } + } + private: + communicator::request req; // TODO(improvement, high priority): use a subset of req here! + + node_t remote_node; + MPI_Aint remote_addr; // this is why we imported mpi.h + T* local_source; + size_t n; + }; #endif -*/ } // namespace detail } // namespace offload diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index f5dcdd7..98baf8f 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -13,8 +13,8 @@ add_subdirectory(ham) ## Explicit targets (not built by default) # TCP benchmarks -# add_executable(benchmark_ham_offload_tcp benchmark_ham_offload.cpp) -# target_link_libraries(benchmark_ham_offload_tcp ham_offload_tcp) +add_executable(benchmark_ham_offload_tcp benchmark_ham_offload.cpp) +target_link_libraries(benchmark_ham_offload_tcp ham_offload_tcp) # Intel LEO offload directive benchmark, requires Intel compiler if (CMAKE_CXX_COMPILER_ID STREQUAL "Intel") @@ -28,6 +28,9 @@ if (MPI_FOUND) add_executable (benchmark_ham_offload_mpi_rma_dynamic benchmark_ham_offload.cpp) target_link_libraries (benchmark_ham_offload_mpi_rma_dynamic ham_offload_mpi_rma_dynamic) + + add_executable (benchmark_ham_offload_mpi_rma_dynamic_data_only benchmark_ham_offload.cpp) + target_link_libraries (benchmark_ham_offload_mpi_rma_dynamic_data_only ham_offload_mpi_rma_dynamic_data_only) endif() if (SCIF_FOUND) @@ -42,17 +45,17 @@ add_executable(active_msgs active_msgs.cpp) target_link_libraries(active_msgs ham_interface) # TCP tests -# add_executable(ham_offload_test_tcp ham_offload.cpp) -# target_link_libraries(ham_offload_test_tcp ham_offload_tcp) +add_executable(ham_offload_test_tcp ham_offload.cpp) +target_link_libraries(ham_offload_test_tcp ham_offload_tcp) -# add_executable(inner_product_tcp inner_product.cpp) -# target_link_libraries(inner_product_tcp ham_offload_tcp) +add_executable(inner_product_tcp inner_product.cpp) +target_link_libraries(inner_product_tcp ham_offload_tcp) -# add_executable(test_data_transfer_tcp test_data_transfer.cpp) -# target_link_libraries(test_data_transfer_tcp ham_offload_tcp) +add_executable(test_data_transfer_tcp test_data_transfer.cpp) +target_link_libraries(test_data_transfer_tcp ham_offload_tcp) -# add_executable(test_argument_transfer_tcp test_argument_transfer.cpp) -# target_link_libraries(test_argument_transfer_tcp ham_offload_tcp) +add_executable(test_argument_transfer_tcp test_argument_transfer.cpp) +target_link_libraries(test_argument_transfer_tcp ham_offload_tcp) if (MPI_FOUND) # two-sided MPI @@ -71,10 +74,11 @@ if (MPI_FOUND) add_executable(test_argument_transfer_mpi test_argument_transfer.cpp) target_link_libraries(test_argument_transfer_mpi ham_offload_mpi) +# Streaming Test MPI add_executable(test_streams_mpi test_streams.cpp) target_link_libraries(test_streams_mpi ham_offload_mpi) -# RMA MPI +# RMA DYNAMIC MPI (full) add_executable(ham_offload_test_mpi_rma_dynamic ham_offload.cpp) target_link_libraries(ham_offload_test_mpi_rma_dynamic ham_offload_mpi_rma_dynamic) @@ -88,6 +92,20 @@ if (MPI_FOUND) add_executable(test_argument_transfer_mpi_rma_dynamic test_argument_transfer.cpp) target_link_libraries(test_argument_transfer_mpi_rma_dynamic ham_offload_mpi_rma_dynamic) +# RMA DYNAMIC MPI (data only) + + add_executable(ham_offload_test_mpi_rma_dynamic_data_only ham_offload.cpp) + target_link_libraries(ham_offload_test_mpi_rma_dynamic_data_only ham_offload_mpi_rma_dynamic_data_only) + + add_executable(inner_product_mpi_rma_dynamic_data_only inner_product.cpp) + target_link_libraries(inner_product_mpi_rma_dynamic_data_only ham_offload_mpi_rma_dynamic_data_only) + + add_executable(test_data_transfer_mpi_rma_dynamic_data_only test_data_transfer.cpp) + target_link_libraries(test_data_transfer_mpi_rma_dynamic_data_only ham_offload_mpi_rma_dynamic_data_only) + + add_executable(test_argument_transfer_mpi_rma_dynamic_data_only test_argument_transfer.cpp) + target_link_libraries(test_argument_transfer_mpi_rma_dynamic_data_only ham_offload_mpi_rma_dynamic_data_only) + endif() if (SCIF_FOUND) diff --git a/src/ham/CMakeLists.txt b/src/ham/CMakeLists.txt index 8108980..4a24c8b 100644 --- a/src/ham/CMakeLists.txt +++ b/src/ham/CMakeLists.txt @@ -22,21 +22,21 @@ set(HAM_LIB_SRC util/cpu_affinity.cpp) # TCP -#add_library(ham_offload_tcp # SHARED if BUILD_SHARED_LIBS = TRUE -# net/communicator.cpp -# net/communicator_tcp.cpp -# offload/runtime.cpp -# offload/offload.cpp -# offload/stream.cpp -# offload/main.cpp -# util/cpu_affinity.cpp) -#target_compile_definitions(ham_offload_tcp PUBLIC -DHAM_COMM_TCP=1) -#target_link_libraries(ham_offload_tcp PUBLIC ham_interface boost_library pthread) +add_library(ham_offload_tcp # SHARED if BUILD_SHARED_LIBS = TRUE + net/communicator.cpp + net/communicator_tcp.cpp + offload/runtime.cpp + offload/offload.cpp + offload/stream.cpp + offload/main.cpp + util/cpu_affinity.cpp) +target_compile_definitions(ham_offload_tcp PUBLIC -DHAM_COMM_TCP=1) +target_link_libraries(ham_offload_tcp PUBLIC ham_interface boost_library pthread) -#set_target_properties(ham_offload_tcp PROPERTIES -# CXX_STANDARD 11 -# CXX_STANDARD_REQUIRED YES -# CXX_EXTENSIONS NO) +set_target_properties(ham_offload_tcp PROPERTIES + CXX_STANDARD 11 + CXX_STANDARD_REQUIRED YES + CXX_EXTENSIONS NO) if (MPI_FOUND) add_library(ham_offload_mpi # SHARED if BUILD_SHARED_LIBS = TRUE @@ -60,7 +60,14 @@ if (MPI_FOUND) target_compile_definitions(ham_offload_mpi_rma_dynamic PUBLIC -DHAM_COMM_MPI_RMA_DYNAMIC=1) target_link_libraries(ham_offload_mpi_rma_dynamic PUBLIC ham_interface mpi_library) - set_target_properties(ham_offload_mpi ham_offload_mpi_explicit ham_offload_mpi_rma_dynamic PROPERTIES + add_library(ham_offload_mpi_rma_dynamic_data_only # SHARED if BUILD_SHARED_LIBS = TRUE + ${HAM_LIB_SRC} + offload/main.cpp + net/communicator_mpi_rma_dynamic_data_only.cpp) + target_compile_definitions(ham_offload_mpi_rma_dynamic_data_only PUBLIC -DHAM_COMM_MPI_RMA_DYNAMIC=1) + target_link_libraries(ham_offload_mpi_rma_dynamic_data_only PUBLIC ham_interface mpi_library) + + set_target_properties(ham_offload_mpi ham_offload_mpi_explicit ham_offload_mpi_rma_dynamic ham_offload_mpi_rma_dynamic_data_only PROPERTIES CXX_STANDARD 11 CXX_STANDARD_REQUIRED YES CXX_EXTENSIONS NO)