From 91ba7e7e194a27468771e03872facd0109612148 Mon Sep 17 00:00:00 2001 From: Pavel Kumbrasev Date: Mon, 25 Sep 2023 21:07:48 +0100 Subject: [PATCH] Fix race in single task Fibonacci example (#1209) * Enable multiple recycles * Extend fibonacci with continuation recursion * Bypass task to operator()() to remove extra synchronization complexities --------- Signed-off-by: pavelkumbrasev Co-authored-by: Dmitri Mokhov --- .../recursive_fibonacci/CMakeLists.txt | 2 +- .../migration/recursive_fibonacci/README.md | 5 +- .../recursive_fibonacci/fibonacci.cpp | 2 + .../fibonacci_single_task.h | 30 ++++++---- .../recursive_fibonacci/fibonacci_two_tasks.h | 16 ++--- .../task_emulation_layer.h | 59 ++++++++++++------- 6 files changed, 67 insertions(+), 47 deletions(-) diff --git a/examples/migration/recursive_fibonacci/CMakeLists.txt b/examples/migration/recursive_fibonacci/CMakeLists.txt index 5032da2330..3967a0a2a9 100644 --- a/examples/migration/recursive_fibonacci/CMakeLists.txt +++ b/examples/migration/recursive_fibonacci/CMakeLists.txt @@ -33,7 +33,7 @@ set(EXECUTABLE "$") # `N` - specifies the fibonacci number which would be calculated. # `C` - cutoff that will be used to stop recursive split. # `I` - number of iteration to measure benchmark time. -set(ARGS 30 16 20) +set(ARGS 30 16 20 1) set(PERF_ARGS 50 5 20) add_execution_target(run_recursive_fibonacci recursive_fibonacci ${EXECUTABLE} "${ARGS}") diff --git a/examples/migration/recursive_fibonacci/README.md b/examples/migration/recursive_fibonacci/README.md index bc66c5d814..1f0341c1ea 100644 --- a/examples/migration/recursive_fibonacci/README.md +++ b/examples/migration/recursive_fibonacci/README.md @@ -9,14 +9,15 @@ cmake --build . ## Running the sample ### Predefined make targets -* `make run_recursive_fibonacci` - executes the example with predefined parameters. +* `make run_recursive_fibonacci` - executes the example with predefined parameters (extended testing enabled). * `make perf_run_recursive_fibonacci` - executes the example with suggested parameters to measure the oneTBB performance. ### Application parameters Usage: ``` -recursive_fibonacci N C I +recursive_fibonacci N C I T ``` * `N` - specifies the fibonacci number which would be calculated. * `C` - cutoff that will be used to stop recursive split. * `I` - number of iteration to measure benchmark time. +* `T` - enables extended testing (recycle task in a loop). diff --git a/examples/migration/recursive_fibonacci/fibonacci.cpp b/examples/migration/recursive_fibonacci/fibonacci.cpp index acf22a497a..e4a7c12eb7 100644 --- a/examples/migration/recursive_fibonacci/fibonacci.cpp +++ b/examples/migration/recursive_fibonacci/fibonacci.cpp @@ -22,6 +22,7 @@ #include int cutoff; +bool testing_enabled; template std::pair measure(F&& f, @@ -48,6 +49,7 @@ int main(int argc, char* argv[]) { int numbers = argc > 1 ? strtol(argv[1], nullptr, 0) : 50; cutoff = argc > 2 ? strtol(argv[2], nullptr, 0) : 16; unsigned long ntrial = argc > 3 ? (unsigned long)strtoul(argv[3], nullptr, 0) : 20; + testing_enabled = argc > 4 ? (bool)strtol(argv[4], nullptr, 0) : false; auto res = measure(fibonacci_two_tasks, numbers, ntrial); std::cout << "Fibonacci two tasks impl N = " << res.first << " Avg time = " << res.second diff --git a/examples/migration/recursive_fibonacci/fibonacci_single_task.h b/examples/migration/recursive_fibonacci/fibonacci_single_task.h index 2467f862b0..dae8895b8c 100644 --- a/examples/migration/recursive_fibonacci/fibonacci_single_task.h +++ b/examples/migration/recursive_fibonacci/fibonacci_single_task.h @@ -24,6 +24,7 @@ #include extern int cutoff; +extern bool testing_enabled; long serial_fib_1(int n) { return n < 2 ? n : serial_fib_1(n - 1) + serial_fib_1(n - 2); @@ -38,39 +39,43 @@ struct single_fib_task : task_emulation::base_task { single_fib_task(int n, int* x) : n(n), x(x), s(state::compute) {} - void execute() override { + task_emulation::base_task* execute() override { + task_emulation::base_task* bypass = nullptr; switch (s) { case state::compute : { - compute_impl(); + bypass = compute_impl(); break; } case state::sum : { *x = x_l + x_r; + + if (testing_enabled) { + if (n == cutoff && num_recycles > 0) { + --num_recycles; + bypass = compute_impl(); + } + } + break; } } + return bypass; } - void compute_impl() { + task_emulation::base_task* compute_impl() { + task_emulation::base_task* bypass = nullptr; if (n < cutoff) { *x = serial_fib_1(n); } else { - auto bypass = this->allocate_child_and_increment(n - 2, &x_r); + bypass = this->allocate_child_and_increment(n - 2, &x_r); task_emulation::run_task(this->allocate_child_and_increment(n - 1, &x_l)); // Recycling this->s = state::sum; this->recycle_as_continuation(); - - // Bypass is not supported by task_emulation and next_task executed directly. - // However, the old-TBB bypass behavior can be achieved with - // `return task_group::defer()` (check Migration Guide). - // Consider submit another task if recursion call is not acceptable - // i.e. instead of Direct Body call - // submit task_emulation::run_task(this->allocate_child_and_increment(n - 2, &x_r)); - bypass->operator()(); } + return bypass; } @@ -79,6 +84,7 @@ struct single_fib_task : task_emulation::base_task { state s; int x_l{ 0 }, x_r{ 0 }; + int num_recycles{5}; }; int fibonacci_single_task(int n) { diff --git a/examples/migration/recursive_fibonacci/fibonacci_two_tasks.h b/examples/migration/recursive_fibonacci/fibonacci_two_tasks.h index 9123662522..5d7fd02292 100644 --- a/examples/migration/recursive_fibonacci/fibonacci_two_tasks.h +++ b/examples/migration/recursive_fibonacci/fibonacci_two_tasks.h @@ -33,8 +33,9 @@ long serial_fib(int n) { struct fib_continuation : task_emulation::base_task { fib_continuation(int& s) : sum(s) {} - void execute() override { + task_emulation::base_task* execute() override { sum = x + y; + return nullptr; } int x{ 0 }, y{ 0 }; @@ -44,7 +45,8 @@ struct fib_continuation : task_emulation::base_task { struct fib_computation : task_emulation::base_task { fib_computation(int n, int* x) : n(n), x(x) {} - void execute() override { + task_emulation::base_task* execute() override { + task_emulation::base_task* bypass = nullptr; if (n < cutoff) { *x = serial_fib(n); } @@ -57,15 +59,9 @@ struct fib_computation : task_emulation::base_task { this->recycle_as_child_of(c); n = n - 2; x = &c.y; - - // Bypass is not supported by task_emulation and next_task executed directly. - // However, the old-TBB bypass behavior can be achieved with - // `return task_group::defer()` (check Migration Guide). - // Consider submit another task if recursion call is not acceptable - // i.e. instead of Recycling + Direct Body call - // submit task_emulation::run_task(c.create_child(n - 2, &c.y)); - this->operator()(); + bypass = this; } + return bypass; } int n; diff --git a/examples/migration/recursive_fibonacci/task_emulation_layer.h b/examples/migration/recursive_fibonacci/task_emulation_layer.h index 3a38712765..7252d447a0 100644 --- a/examples/migration/recursive_fibonacci/task_emulation_layer.h +++ b/examples/migration/recursive_fibonacci/task_emulation_layer.h @@ -47,32 +47,45 @@ class base_task { public: base_task() = default; - base_task(const base_task& t) : m_parent(t.m_parent), m_child_counter(t.m_child_counter.load()) + base_task(const base_task& t) : m_type(t.m_type), m_parent(t.m_parent), m_child_counter(t.m_child_counter.load()) {} virtual ~base_task() = default; void operator() () const { - base_task* parent_snapshot = m_parent; - const_cast(this)->execute(); - if (m_parent && parent_snapshot == m_parent && m_child_counter == 0) { - if (m_parent->remove_reference() == 0) { + task_type type_snapshot = m_type; + + base_task* bypass = const_cast(this)->execute(); + + if (m_parent && m_type != task_type::recycled) { + if (m_parent->remove_child_reference() == 0) { m_parent->operator()(); - delete m_parent; } } - if (m_child_counter == 0 && m_type == task_type::allocated) { + if (m_type == task_type::allocated) { delete this; } + + if (bypass != nullptr) { + m_type = type_snapshot; + + // Bypass is not supported by task_emulation and next_task executed directly. + // However, the old-TBB bypass behavior can be achieved with + // `return task_group::defer()` (check Migration Guide). + // Consider submit another task if recursion call is not acceptable + // i.e. instead of Direct Body call + // submit task_emulation::run_task(); + bypass->operator()(); + } } - virtual void execute() = 0; + virtual base_task* execute() = 0; template C* allocate_continuation(std::uint64_t ref, Args&&... args) { C* continuation = new C{std::forward(args)...}; - continuation->m_type = task_type::continuation; + continuation->m_type = task_type::allocated; continuation->reset_parent(reset_parent()); continuation->m_child_counter = ref; return continuation; @@ -85,7 +98,7 @@ class base_task { template F create_child_and_increment(Args&&... args) { - add_reference(); + add_child_reference(); return create_child_impl(std::forward(args)...); } @@ -96,35 +109,36 @@ class base_task { template F* allocate_child_and_increment(Args&&... args) { - add_reference(); + add_child_reference(); return allocate_child_impl(std::forward(args)...); } template void recycle_as_child_of(C& c) { + m_type = task_type::recycled; reset_parent(&c); } void recycle_as_continuation() { - m_type = task_type::continuation; + m_type = task_type::recycled; } - void add_reference() { + void add_child_reference() { ++m_child_counter; } - std::uint64_t remove_reference() { + std::uint64_t remove_child_reference() { return --m_child_counter; } protected: enum class task_type { - created, + stack_based, allocated, - continuation + recycled }; - task_type m_type; + mutable task_type m_type; private: template @@ -136,7 +150,7 @@ class base_task { template F create_child_impl(Args&&... args) { F obj{std::forward(args)...}; - obj.m_type = task_type::created; + obj.m_type = task_type::stack_based; obj.reset_parent(this); return obj; } @@ -162,13 +176,14 @@ class base_task { class root_task : public base_task { public: root_task(tbb::task_group& tg) : m_tg(tg), m_callback(m_tg.defer([] { /* Create empty callback to preserve reference for wait. */})) { - add_reference(); - m_type = base_task::task_type::continuation; + add_child_reference(); + m_type = base_task::task_type::allocated; } private: - void execute() override { + base_task* execute() override { m_tg.run(std::move(m_callback)); + return nullptr; } tbb::task_group& m_tg; @@ -178,7 +193,7 @@ class root_task : public base_task { template F create_root_task(tbb::task_group& tg, Args&&... args) { F obj{std::forward(args)...}; - obj.m_type = base_task::task_type::created; + obj.m_type = base_task::task_type::stack_based; obj.reset_parent(new root_task{tg}); return obj; }