From 4107ceda64558ea215136a6e71c4aa0fd9ebd9cc Mon Sep 17 00:00:00 2001 From: pavelkumbrasev Date: Thu, 21 Sep 2023 13:36:07 +0100 Subject: [PATCH] Bypass task to operator()() to remove extra synchronization complexities Signed-off-by: pavelkumbrasev --- .../fibonacci_single_task.h | 22 +++---- .../recursive_fibonacci/fibonacci_two_tasks.h | 16 ++--- .../task_emulation_layer.h | 58 +++++++++---------- 3 files changed, 44 insertions(+), 52 deletions(-) diff --git a/examples/migration/recursive_fibonacci/fibonacci_single_task.h b/examples/migration/recursive_fibonacci/fibonacci_single_task.h index 12be6644d7..61b96056af 100644 --- a/examples/migration/recursive_fibonacci/fibonacci_single_task.h +++ b/examples/migration/recursive_fibonacci/fibonacci_single_task.h @@ -39,10 +39,11 @@ struct single_fib_task : task_emulation::base_task { single_fib_task(int n, int* x) : n(n), x(x), s(state::compute) {} - void execute() override { + task_emulation::base_task* execute() override { + task_emulation::base_task* bypass = nullptr; switch (s) { case state::compute : { - compute_impl(); + bypass = compute_impl(); break; } case state::sum : { @@ -51,35 +52,30 @@ struct single_fib_task : task_emulation::base_task { if (tesing_enabled) { if (n == cutoff && num_recycles > 0) { --num_recycles; - compute_impl(); + bypass = compute_impl(); } } break; } } + return bypass; } - void compute_impl() { + task_emulation::base_task* compute_impl() { + task_emulation::base_task* bypass = nullptr; if (n < cutoff) { *x = serial_fib_1(n); } else { - auto bypass = this->allocate_child_and_increment(n - 2, &x_r); + bypass = this->allocate_child_and_increment(n - 2, &x_r); task_emulation::run_task(this->allocate_child_and_increment(n - 1, &x_l)); // Recycling this->s = state::sum; this->recycle_as_continuation(); - - // Bypass is not supported by task_emulation and next_task executed directly. - // However, the old-TBB bypass behavior can be achieved with - // `return task_group::defer()` (check Migration Guide). - // Consider submit another task if recursion call is not acceptable - // i.e. instead of Direct Body call - // submit task_emulation::run_task(this->allocate_child_and_increment(n - 2, &x_r)); - bypass->operator()(); } + return bypass; } diff --git a/examples/migration/recursive_fibonacci/fibonacci_two_tasks.h b/examples/migration/recursive_fibonacci/fibonacci_two_tasks.h index 9123662522..5d7fd02292 100644 --- a/examples/migration/recursive_fibonacci/fibonacci_two_tasks.h +++ b/examples/migration/recursive_fibonacci/fibonacci_two_tasks.h @@ -33,8 +33,9 @@ long serial_fib(int n) { struct fib_continuation : task_emulation::base_task { fib_continuation(int& s) : sum(s) {} - void execute() override { + task_emulation::base_task* execute() override { sum = x + y; + return nullptr; } int x{ 0 }, y{ 0 }; @@ -44,7 +45,8 @@ struct fib_continuation : task_emulation::base_task { struct fib_computation : task_emulation::base_task { fib_computation(int n, int* x) : n(n), x(x) {} - void execute() override { + task_emulation::base_task* execute() override { + task_emulation::base_task* bypass = nullptr; if (n < cutoff) { *x = serial_fib(n); } @@ -57,15 +59,9 @@ struct fib_computation : task_emulation::base_task { this->recycle_as_child_of(c); n = n - 2; x = &c.y; - - // Bypass is not supported by task_emulation and next_task executed directly. - // However, the old-TBB bypass behavior can be achieved with - // `return task_group::defer()` (check Migration Guide). - // Consider submit another task if recursion call is not acceptable - // i.e. instead of Recycling + Direct Body call - // submit task_emulation::run_task(c.create_child(n - 2, &c.y)); - this->operator()(); + bypass = this; } + return bypass; } int n; diff --git a/examples/migration/recursive_fibonacci/task_emulation_layer.h b/examples/migration/recursive_fibonacci/task_emulation_layer.h index b1e96f0b78..b387a6fde1 100644 --- a/examples/migration/recursive_fibonacci/task_emulation_layer.h +++ b/examples/migration/recursive_fibonacci/task_emulation_layer.h @@ -47,41 +47,51 @@ class base_task { public: base_task() = default; - base_task(const base_task& t) : m_type(t.m_type.load()), m_parent(t.m_parent), m_ref_counter(t.m_ref_counter.load()) + base_task(const base_task& t) : m_type(t.m_type), m_parent(t.m_parent), m_ref_counter(t.m_ref_counter.load()) {} virtual ~base_task() = default; void operator() () const { base_task* parent_snapshot = m_parent; - std::uint64_t type_snapshot = m_type; + task_type type_snapshot = m_type; - const_cast(this)->execute(); + base_task* bypass = const_cast(this)->execute(); bool is_task_recycled_as_child = parent_snapshot != m_parent; bool is_task_recycled_as_continuation = type_snapshot != m_type; if (m_parent && !is_task_recycled_as_child && !is_task_recycled_as_continuation) { - auto child_ref = m_parent->remove_child_reference() & (m_self_ref - 1); - if (child_ref == 0) { + if (m_parent->remove_child_reference() == 0) { m_parent->operator()(); } } - if (type_snapshot != task_type::stack_based && const_cast(this)->remove_self_ref() == 0) { + if (type_snapshot != task_type::stack_based && !is_task_recycled_as_child && !is_task_recycled_as_continuation) { delete this; } + + if (bypass != nullptr) { + m_type = type_snapshot; + + // Bypass is not supported by task_emulation and next_task executed directly. + // However, the old-TBB bypass behavior can be achieved with + // `return task_group::defer()` (check Migration Guide). + // Consider submit another task if recursion call is not acceptable + // i.e. instead of Direct Body call + // submit task_emulation::run_task(); + bypass->operator()(); + } } - virtual void execute() = 0; + virtual base_task* execute() = 0; template C* allocate_continuation(std::uint64_t ref, Args&&... args) { C* continuation = new C{std::forward(args)...}; - continuation->m_type = task_type::continuation; + continuation->m_type = task_type::allocated; continuation->reset_parent(reset_parent()); continuation->m_ref_counter = ref; - continuation->add_self_ref(); return continuation; } @@ -109,12 +119,12 @@ class base_task { template void recycle_as_child_of(C& c) { + m_type = task_type::recycled; reset_parent(&c); } void recycle_as_continuation() { - add_self_ref(); - m_type += task_type::continuation; + m_type = task_type::recycled; } void add_child_reference() { @@ -126,21 +136,13 @@ class base_task { } protected: - void add_self_ref() { - m_ref_counter.fetch_add(m_self_ref); - } - - std::uint64_t remove_self_ref() { - return m_ref_counter.fetch_sub(m_self_ref) - m_self_ref; - } - - struct task_type { - static constexpr std::uint64_t stack_based = 1; - static constexpr std::uint64_t allocated = 1 << 1; - static constexpr std::uint64_t continuation = 1 << 2; + enum task_type { + stack_based, + allocated, + recycled }; - std::atomic m_type; + mutable task_type m_type; private: template @@ -161,7 +163,6 @@ class base_task { F* allocate_child_impl(Args&&... args) { F* obj = new F{std::forward(args)...}; obj->m_type = task_type::allocated; - obj->add_self_ref(); obj->reset_parent(this); return obj; } @@ -173,7 +174,6 @@ class base_task { } base_task* m_parent{nullptr}; - static constexpr std::uint64_t m_self_ref = std::uint64_t(1) << 48; std::atomic m_ref_counter{0}; }; @@ -181,12 +181,13 @@ class root_task : public base_task { public: root_task(tbb::task_group& tg) : m_tg(tg), m_callback(m_tg.defer([] { /* Create empty callback to preserve reference for wait. */})) { add_child_reference(); - m_type = base_task::task_type::continuation; + m_type = base_task::task_type::allocated; } private: - void execute() override { + base_task* execute() override { m_tg.run(std::move(m_callback)); + return nullptr; } tbb::task_group& m_tg; @@ -205,7 +206,6 @@ template F* allocate_root_task(tbb::task_group& tg, Args&&... args) { F* obj = new F{std::forward(args)...}; obj->m_type = base_task::task_type::allocated; - obj->add_self_ref(); obj->reset_parent(new root_task{tg}); return obj; }