Skip to content

Commit

Permalink
Fix review comments
Browse files Browse the repository at this point in the history
Signed-off-by: Sergey Kopienko <[email protected]>
  • Loading branch information
SergeyKopienko committed Aug 9, 2024
1 parent 193983b commit 5d266b3
Showing 1 changed file with 4 additions and 7 deletions.
11 changes: 4 additions & 7 deletions include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h
Original file line number Diff line number Diff line change
Expand Up @@ -1180,18 +1180,15 @@ struct __parallel_find_or_nd_range_tuner<oneapi::dpl::__internal::__device_backe
// If our work capacity is not enough to process all data in one iteration, will tune the number of work-groups
if (__iters_per_work_item > 1)
{
auto __current_iters_per_work_item =
oneapi::dpl::__internal::__dpl_ceiling_div(__rng_n, __n_groups * __wgroup_size);

// Empirically found formula for GPU devices.
const auto __rng_x = __rng_n / 4096;
const auto __desired_iters_per_work_item = std::max(std::sqrt(__rng_x), 1.);

if (__current_iters_per_work_item < __desired_iters_per_work_item)
if (__iters_per_work_item < __desired_iters_per_work_item)
{
auto __k = __desired_iters_per_work_item / __current_iters_per_work_item;
__k = std::pow(2, std::ceil(std::log2(__k)));
__n_groups = (std::size_t)std::ceil(__n_groups / __k);
auto __k = oneapi::dpl::__internal::__dpl_bit_ceil(
(std::size_t)std::floor(__desired_iters_per_work_item / __iters_per_work_item));
__n_groups = oneapi::dpl::__internal::__dpl_ceiling_div(__n_groups, __k);

assert(oneapi::dpl::__internal::__dpl_ceiling_div(__rng_n, __n_groups * __wgroup_size) <=
__desired_iters_per_work_item);
Expand Down

0 comments on commit 5d266b3

Please sign in to comment.