Skip to content

Commit

Permalink
Better rvalues support for parallel_reduce algorithm
Browse files Browse the repository at this point in the history
Signed-off-by: Konstantin Boyarinov <[email protected]>
  • Loading branch information
kboyarinov committed Feb 1, 2024
1 parent 4dfcb6d commit bb26935
Show file tree
Hide file tree
Showing 2 changed files with 94 additions and 22 deletions.
45 changes: 23 additions & 22 deletions include/oneapi/tbb/parallel_reduce.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,9 @@ inline namespace d0 {

template <typename Body, typename Range>
concept parallel_reduce_body = splittable<Body> &&
requires( Body& body, const Range& range, Body& rhs ) {
requires( Body& body, const Range& range, Body&& rhs ) {
body(range);
body.join(rhs);
body.join(std::move(rhs));
};

template <typename Function, typename Range, typename Value>
Expand Down Expand Up @@ -390,14 +390,15 @@ class lambda_reduce_body {
, my_value(other.my_identity_element)
{ }
void operator()(Range& range) {
my_value = tbb::detail::invoke(my_real_body, range, const_cast<const Value&>(my_value));
my_value = tbb::detail::invoke(my_real_body, range, std::move(my_value));
}

void join( lambda_reduce_body& rhs ) {
my_value = tbb::detail::invoke(my_reduction, const_cast<const Value&>(my_value),
const_cast<const Value&>(rhs.my_value));
my_value = tbb::detail::invoke(my_reduction, std::move(my_value), std::move(rhs.my_value));
}
Value result() const {
return my_value;

Value&& result() noexcept {
return std::move(my_value);
}
};

Expand All @@ -411,7 +412,7 @@ class lambda_reduce_body {
- \code Body::~Body(); \endcode Destructor
- \code void Body::operator()( Range& r ); \endcode Function call operator applying body to range \c r
and accumulating the result
- \code void Body::join( Body& b ); \endcode Join results.
- \code void Body::join( Body&& b ); \endcode Join results.
The result in \c b should be merged into the result of \c this
**/

Expand Down Expand Up @@ -514,7 +515,7 @@ Value parallel_reduce( const Range& range, const Value& identity, const RealBody
lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const __TBB_DEFAULT_PARTITIONER>
::run(range, body, __TBB_DEFAULT_PARTITIONER() );
return body.result();
return std::move(body.result());
}

//! Parallel iteration with reduction and simple_partitioner.
Expand All @@ -527,7 +528,7 @@ Value parallel_reduce( const Range& range, const Value& identity, const RealBody
lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const simple_partitioner>
::run(range, body, partitioner );
return body.result();
return std::move(body.result());
}

//! Parallel iteration with reduction and auto_partitioner
Expand All @@ -540,7 +541,7 @@ Value parallel_reduce( const Range& range, const Value& identity, const RealBody
lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const auto_partitioner>
::run( range, body, partitioner );
return body.result();
return std::move(body.result());
}

//! Parallel iteration with reduction and static_partitioner
Expand All @@ -553,7 +554,7 @@ Value parallel_reduce( const Range& range, const Value& identity, const RealBody
lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const static_partitioner>
::run( range, body, partitioner );
return body.result();
return std::move(body.result());
}

//! Parallel iteration with reduction and affinity_partitioner
Expand All @@ -566,7 +567,7 @@ Value parallel_reduce( const Range& range, const Value& identity, const RealBody
lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,affinity_partitioner>
::run( range, body, partitioner );
return body.result();
return std::move(body.result());
}

//! Parallel iteration with reduction, default partitioner and user-supplied context.
Expand All @@ -579,7 +580,7 @@ Value parallel_reduce( const Range& range, const Value& identity, const RealBody
lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const __TBB_DEFAULT_PARTITIONER>
::run( range, body, __TBB_DEFAULT_PARTITIONER(), context );
return body.result();
return std::move(body.result());
}

//! Parallel iteration with reduction, simple partitioner and user-supplied context.
Expand All @@ -592,7 +593,7 @@ Value parallel_reduce( const Range& range, const Value& identity, const RealBody
lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const simple_partitioner>
::run( range, body, partitioner, context );
return body.result();
return std::move(body.result());
}

//! Parallel iteration with reduction, auto_partitioner and user-supplied context
Expand All @@ -605,7 +606,7 @@ Value parallel_reduce( const Range& range, const Value& identity, const RealBody
lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const auto_partitioner>
::run( range, body, partitioner, context );
return body.result();
return std::move(body.result());
}

//! Parallel iteration with reduction, static_partitioner and user-supplied context
Expand All @@ -618,7 +619,7 @@ Value parallel_reduce( const Range& range, const Value& identity, const RealBody
lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const static_partitioner>
::run( range, body, partitioner, context );
return body.result();
return std::move(body.result());
}

//! Parallel iteration with reduction, affinity_partitioner and user-supplied context
Expand All @@ -631,7 +632,7 @@ Value parallel_reduce( const Range& range, const Value& identity, const RealBody
lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,affinity_partitioner>
::run( range, body, partitioner, context );
return body.result();
return std::move(body.result());
}

//! Parallel iteration with deterministic reduction and default simple partitioner.
Expand Down Expand Up @@ -704,7 +705,7 @@ Value parallel_deterministic_reduce( const Range& range, const Value& identity,
lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
start_deterministic_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>, const simple_partitioner>
::run(range, body, partitioner);
return body.result();
return std::move(body.result());
}

//! Parallel iteration with deterministic reduction and static partitioner.
Expand All @@ -716,7 +717,7 @@ Value parallel_deterministic_reduce( const Range& range, const Value& identity,
lambda_reduce_body<Range, Value, RealBody, Reduction> body(identity, real_body, reduction);
start_deterministic_reduce<Range, lambda_reduce_body<Range, Value, RealBody, Reduction>, const static_partitioner>
::run(range, body, partitioner);
return body.result();
return std::move(body.result());
}

//! Parallel iteration with deterministic reduction, default simple partitioner and user-supplied context.
Expand All @@ -739,7 +740,7 @@ Value parallel_deterministic_reduce( const Range& range, const Value& identity,
lambda_reduce_body<Range, Value, RealBody, Reduction> body(identity, real_body, reduction);
start_deterministic_reduce<Range, lambda_reduce_body<Range, Value, RealBody, Reduction>, const simple_partitioner>
::run(range, body, partitioner, context);
return body.result();
return std::move(body.result());
}

//! Parallel iteration with deterministic reduction, static partitioner and user-supplied context.
Expand All @@ -752,7 +753,7 @@ Value parallel_deterministic_reduce( const Range& range, const Value& identity,
lambda_reduce_body<Range, Value, RealBody, Reduction> body(identity, real_body, reduction);
start_deterministic_reduce<Range, lambda_reduce_body<Range, Value, RealBody, Reduction>, const static_partitioner>
::run(range, body, partitioner, context);
return body.result();
return std::move(body.result());
}
//@}

Expand Down
71 changes: 71 additions & 0 deletions test/conformance/conformance_parallel_reduce.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include "common/test_invoke.h"

#include "../tbb/test_partitioner.h"
#include <vector>

//! \file conformance_parallel_reduce.cpp
//! \brief Test for [algorithms.parallel_reduce algorithms.parallel_deterministic_reduce] specification
Expand Down Expand Up @@ -56,6 +57,37 @@ struct ReduceBody {
}
};

template <class T>
struct vector_wrapper : std::vector<T> {
using base_vector = std::vector<T>;

struct non_empty_exception {};

using base_vector::base_vector;
vector_wrapper() = default;

vector_wrapper(vector_wrapper&&) = default;
vector_wrapper& operator=(vector_wrapper&&) = default;

vector_wrapper(const vector_wrapper& other)
: base_vector(other)
{
if (!other.empty()) {
throw non_empty_exception{};
}
}

vector_wrapper& operator=(const vector_wrapper& other) {
if (this != &other) {
if (!other.empty()) {
throw non_empty_exception{};
}
this->operator=(other);
}
return *this;
}
}; // struct vector_wrapper

template <class Partitioner>
void TestDeterministicReductionFor() {
const int N = 1000;
Expand Down Expand Up @@ -174,3 +206,42 @@ TEST_CASE("parallel_[deterministic_]reduce and std::invoke") {
}

#endif

TEST_CASE("test rvalue optimization") {
constexpr std::size_t n_vectors = 10000;
std::vector<vector_wrapper<int>> vector_of_vectors;
auto init = {1, 2, 3, 4, 5};

vector_of_vectors.reserve(n_vectors);
for (std::size_t i = 0; i < n_vectors; ++i) {
vector_of_vectors.emplace_back(vector_wrapper<int>(init));
}

oneapi::tbb::blocked_range<std::size_t> range(0, n_vectors);

auto reduce_body = [&](const oneapi::tbb::blocked_range<std::size_t>& rng, vector_wrapper<int>&& x) {
vector_wrapper<int> new_vector = std::move(x);
for (std::size_t index = rng.begin(); index != rng.end(); ++index) {
new_vector.reserve(vector_of_vectors[index].size());
new_vector.insert(new_vector.end(), std::make_move_iterator(vector_of_vectors[index].begin()), std::make_move_iterator(vector_of_vectors[index].end()));
}
return new_vector;
};

auto join_body = [](vector_wrapper<int>&& x, vector_wrapper<int>&& y) {
vector_wrapper<int> new_vector = std::move(x);
new_vector.reserve(new_vector.size() + y.size());
new_vector.insert(new_vector.end(), std::make_move_iterator(y.begin()), std::make_move_iterator(y.end()));
return new_vector;
};

vector_wrapper<int> result = oneapi::tbb::parallel_reduce(range, vector_wrapper<int>{}, reduce_body, join_body);

vector_wrapper<int> expected_vector;
expected_vector.reserve(5 * n_vectors);
for (std::size_t i = 0; i < n_vectors; ++i) {
expected_vector.insert(expected_vector.end(), init);
}

REQUIRE_MESSAGE(expected_vector == result, "Incorrect reduce result");
}

0 comments on commit bb26935

Please sign in to comment.